From cacd732d8abbdbdcea1859b0aedf46fec6b05437 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Wed, 9 Mar 2022 08:54:45 -0700 Subject: [PATCH 01/28] DAOS-9623 control: Enable multi-provider in server (#8396) - Add secondary providers to internal DAOS gRPC/dRPC messages. - Add secondary providers and URIs to member database. - Parse multiple providers and their corresponding interfaces/ports from the yaml config file. Signed-off-by: Kris Jacque --- src/control/cmd/daos_server/network.go | 7 +- src/control/cmd/dmg/auto_test.go | 4 +- src/control/common/proto/mgmt/svc.pb.go | 242 ++++++++++++-------- src/control/common/proto/mgmt/system.pb.go | 157 +++++++------ src/control/lib/control/auto.go | 2 +- src/control/lib/control/system_test.go | 10 +- src/control/server/config/server.go | 2 +- src/control/server/ctl_network_rpc.go | 6 +- src/control/server/engine/config.go | 156 +++++++++++-- src/control/server/engine/config_test.go | 249 ++++++++++++++++++++- src/control/server/mgmt_svc.go | 6 +- src/control/server/mgmt_system.go | 75 ++++--- src/control/server/mgmt_system_test.go | 29 ++- src/control/server/server.go | 42 +++- src/control/server/server_utils.go | 57 +++-- src/control/server/server_utils_test.go | 13 +- src/control/system/database.go | 23 +- src/control/system/database_test.go | 34 +-- src/control/system/member.go | 51 +++-- src/control/system/membership.go | 41 ++-- src/control/system/membership_test.go | 107 ++++----- src/control/system/mocks.go | 4 +- src/mgmt/svc.pb-c.c | 70 +++++- src/mgmt/svc.pb-c.h | 32 ++- src/proto/mgmt/svc.proto | 30 +-- src/proto/mgmt/system.proto | 3 +- 26 files changed, 1039 insertions(+), 413 deletions(-) diff --git a/src/control/cmd/daos_server/network.go b/src/control/cmd/daos_server/network.go index 58be3a75940..f6bbb171289 100644 --- a/src/control/cmd/daos_server/network.go +++ b/src/control/cmd/daos_server/network.go @@ -37,7 +37,12 @@ func (cmd *networkScanCmd) Execute(_ []string) error { } if cmd.FabricProvider == "" { - cmd.FabricProvider = cmd.config.Fabric.Provider + prov, err := cmd.config.Fabric.GetPrimaryProvider() + if err != nil { + return err + } + + cmd.FabricProvider = prov } hf := fabricInterfaceSetToHostFabric(results, cmd.FabricProvider) diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 96d5649368d..3e0df31117d 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -141,7 +141,7 @@ engines: - "0000:03:00.0" provider: ofi+verbs fabric_iface: ib0 - fabric_iface_port: 31416 + fabric_iface_port: "31416" pinned_numa_node: 0 - targets: 6 nr_xs_helpers: 0 @@ -159,7 +159,7 @@ engines: - "0000:06:00.0" provider: ofi+verbs fabric_iface: ib1 - fabric_iface_port: 32416 + fabric_iface_port: "32416" pinned_numa_node: 1 disable_vfio: false enable_vmd: false diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 2048643ea13..a87c5043fae 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2018-2021 Intel Corporation. +// (C) Copyright 2018-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -7,7 +7,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.26.0 -// protoc v3.12.4 +// protoc v3.6.1 // source: mgmt/svc.proto package mgmt @@ -227,15 +227,16 @@ type JoinReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system name. - Uuid string `protobuf:"bytes,2,opt,name=uuid,proto3" json:"uuid,omitempty"` // Server UUID. - Rank uint32 `protobuf:"varint,3,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank desired, if not MAX_UINT32. - Uri string `protobuf:"bytes,4,opt,name=uri,proto3" json:"uri,omitempty"` // Server CaRT base URI (i.e., for context 0). - Nctxs uint32 `protobuf:"varint,5,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Server CaRT context count. - Addr string `protobuf:"bytes,6,opt,name=addr,proto3" json:"addr,omitempty"` // Server management address. - SrvFaultDomain string `protobuf:"bytes,7,opt,name=srvFaultDomain,proto3" json:"srvFaultDomain,omitempty"` // Fault domain for this instance's server - Idx uint32 `protobuf:"varint,8,opt,name=idx,proto3" json:"idx,omitempty"` // Instance index on server node. - Incarnation uint64 `protobuf:"varint,9,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // rank incarnation + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system name. + Uuid string `protobuf:"bytes,2,opt,name=uuid,proto3" json:"uuid,omitempty"` // Server UUID. + Rank uint32 `protobuf:"varint,3,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank desired, if not MAX_UINT32. + Uri string `protobuf:"bytes,4,opt,name=uri,proto3" json:"uri,omitempty"` // Server CaRT primary provider URI (i.e., for context 0). + Nctxs uint32 `protobuf:"varint,5,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Server CaRT context count. + Addr string `protobuf:"bytes,6,opt,name=addr,proto3" json:"addr,omitempty"` // Server management address. + SrvFaultDomain string `protobuf:"bytes,7,opt,name=srvFaultDomain,proto3" json:"srvFaultDomain,omitempty"` // Fault domain for this instance's server + Idx uint32 `protobuf:"varint,8,opt,name=idx,proto3" json:"idx,omitempty"` // Instance index on server node. + Incarnation uint64 `protobuf:"varint,9,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // rank incarnation + SecondaryUris []string `protobuf:"bytes,10,rep,name=secondary_uris,json=secondaryUris,proto3" json:"secondary_uris,omitempty"` // URIs for any secondary providers } func (x *JoinReq) Reset() { @@ -333,6 +334,13 @@ func (x *JoinReq) GetIncarnation() uint64 { return 0 } +func (x *JoinReq) GetSecondaryUris() []string { + if x != nil { + return x.SecondaryUris + } + return nil +} + type JoinResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -671,11 +679,13 @@ type GetAttachInfoResp struct { unknownFields protoimpl.UnknownFields Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code - RankUris []*GetAttachInfoResp_RankUri `protobuf:"bytes,2,rep,name=rank_uris,json=rankUris,proto3" json:"rank_uris,omitempty"` // Rank URIs + RankUris []*GetAttachInfoResp_RankUri `protobuf:"bytes,2,rep,name=rank_uris,json=rankUris,proto3" json:"rank_uris,omitempty"` // Rank URIs for the primary provider // These CaRT settings are shared with the // libdaos client to aid in CaRT initialization. - MsRanks []uint32 `protobuf:"varint,3,rep,packed,name=ms_ranks,json=msRanks,proto3" json:"ms_ranks,omitempty"` // Ranks local to MS replicas - ClientNetHint *ClientNetHint `protobuf:"bytes,4,opt,name=client_net_hint,json=clientNetHint,proto3" json:"client_net_hint,omitempty"` + MsRanks []uint32 `protobuf:"varint,3,rep,packed,name=ms_ranks,json=msRanks,proto3" json:"ms_ranks,omitempty"` // Ranks local to MS replicas + ClientNetHint *ClientNetHint `protobuf:"bytes,4,opt,name=client_net_hint,json=clientNetHint,proto3" json:"client_net_hint,omitempty"` // Primary provider hint + SecondaryRankUris []*GetAttachInfoResp_RankUri `protobuf:"bytes,5,rep,name=secondary_rank_uris,json=secondaryRankUris,proto3" json:"secondary_rank_uris,omitempty"` // Rank URIs for additional providers + SecondaryClientNetHints []*ClientNetHint `protobuf:"bytes,6,rep,name=secondary_client_net_hints,json=secondaryClientNetHints,proto3" json:"secondary_client_net_hints,omitempty"` // Hints for additional providers } func (x *GetAttachInfoResp) Reset() { @@ -738,6 +748,20 @@ func (x *GetAttachInfoResp) GetClientNetHint() *ClientNetHint { return nil } +func (x *GetAttachInfoResp) GetSecondaryRankUris() []*GetAttachInfoResp_RankUri { + if x != nil { + return x.SecondaryRankUris + } + return nil +} + +func (x *GetAttachInfoResp) GetSecondaryClientNetHints() []*ClientNetHint { + if x != nil { + return x.SecondaryClientNetHints + } + return nil +} + type PrepShutdownReq struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -956,7 +980,7 @@ type GroupUpdateReq_Engine struct { unknownFields protoimpl.UnknownFields Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` - Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` + Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` // primary URI is the only one group update is concerned with } func (x *GroupUpdateReq_Engine) Reset() { @@ -1010,8 +1034,9 @@ type GetAttachInfoResp_RankUri struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` - Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` + Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` + Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` + Provider string `protobuf:"bytes,3,opt,name=provider,proto3" json:"provider,omitempty"` } func (x *GetAttachInfoResp_RankUri) Reset() { @@ -1060,6 +1085,13 @@ func (x *GetAttachInfoResp_RankUri) GetUri() string { return "" } +func (x *GetAttachInfoResp_RankUri) GetProvider() string { + if x != nil { + return x.Provider + } + return "" +} + var File_mgmt_svc_proto protoreflect.FileDescriptor var file_mgmt_svc_proto_rawDesc = []byte{ @@ -1079,7 +1111,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x29, 0x0a, 0x0f, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x22, 0xdb, 0x01, 0x0a, 0x07, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, + 0x22, 0x82, 0x02, 0x0a, 0x07, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, @@ -1092,81 +1124,95 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x69, 0x64, 0x78, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, - 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xbc, - 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, - 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, - 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, - 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, 0x6f, - 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, - 0x6f, 0x69, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x06, 0x0a, 0x02, - 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x22, 0x22, 0x0a, - 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, - 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, - 0x73, 0x22, 0x53, 0x0a, 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x52, 0x65, 0x73, 0x70, 0x12, 0x24, 0x0a, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, - 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, - 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, - 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, - 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, - 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, - 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0xf3, 0x01, 0x0a, 0x0d, 0x43, 0x6c, - 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, - 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, - 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, - 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, - 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, - 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, - 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, - 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, - 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, - 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, - 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x22, - 0xf2, 0x01, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, - 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, - 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, - 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, - 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, - 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, - 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x25, + 0x0a, 0x0e, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x75, 0x72, 0x69, 0x73, + 0x18, 0x0a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, + 0x79, 0x55, 0x72, 0x69, 0x73, 0x22, 0xbc, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, + 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, + 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, + 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, + 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, + 0x55, 0x54, 0x10, 0x01, 0x22, 0x22, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x53, 0x0a, 0x0f, 0x4c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x24, 0x0a, 0x0d, 0x63, + 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, + 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, + 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x22, 0xf3, 0x01, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, + 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, + 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, + 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, + 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, + 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, + 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, + 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, + 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, + 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, + 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, + 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x22, 0xb1, 0x03, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, + 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, + 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, + 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, + 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, + 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, + 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, + 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, + 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, + 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, + 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, + 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, + 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, + 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, + 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, - 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, - 0x69, 0x6e, 0x74, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, - 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x75, 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, - 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, - 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, - 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, - 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, - 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, - 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, - 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, - 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, - 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, - 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, - 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, 0x4b, 0x0a, + 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, + 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x1a, + 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, + 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, + 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, + 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, + 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, + 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, + 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, + 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, + 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, + 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, + 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, + 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, + 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, + 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1207,11 +1253,13 @@ var file_mgmt_svc_proto_depIdxs = []int32{ 0, // 1: mgmt.JoinResp.state:type_name -> mgmt.JoinResp.State 16, // 2: mgmt.GetAttachInfoResp.rank_uris:type_name -> mgmt.GetAttachInfoResp.RankUri 9, // 3: mgmt.GetAttachInfoResp.client_net_hint:type_name -> mgmt.ClientNetHint - 4, // [4:4] is the sub-list for method output_type - 4, // [4:4] is the sub-list for method input_type - 4, // [4:4] is the sub-list for extension type_name - 4, // [4:4] is the sub-list for extension extendee - 0, // [0:4] is the sub-list for field type_name + 16, // 4: mgmt.GetAttachInfoResp.secondary_rank_uris:type_name -> mgmt.GetAttachInfoResp.RankUri + 9, // 5: mgmt.GetAttachInfoResp.secondary_client_net_hints:type_name -> mgmt.ClientNetHint + 6, // [6:6] is the sub-list for method output_type + 6, // [6:6] is the sub-list for method input_type + 6, // [6:6] is the sub-list for extension type_name + 6, // [6:6] is the sub-list for extension extendee + 0, // [0:6] is the sub-list for field type_name } func init() { file_mgmt_svc_proto_init() } diff --git a/src/control/common/proto/mgmt/system.pb.go b/src/control/common/proto/mgmt/system.pb.go index 6830b2031dc..2cdea9b2086 100644 --- a/src/control/common/proto/mgmt/system.pb.go +++ b/src/control/common/proto/mgmt/system.pb.go @@ -6,8 +6,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.27.1-devel -// protoc v3.5.0 +// protoc-gen-go v1.26.0 +// protoc v3.6.1 // source: mgmt/system.proto package mgmt @@ -42,9 +42,10 @@ type SystemMember struct { FabricUri string `protobuf:"bytes,6,opt,name=fabric_uri,json=fabricUri,proto3" json:"fabric_uri,omitempty"` FabricContexts uint32 `protobuf:"varint,7,opt,name=fabric_contexts,json=fabricContexts,proto3" json:"fabric_contexts,omitempty"` // ancillary info e.g. error msg or reason for state change - Info string `protobuf:"bytes,8,opt,name=info,proto3" json:"info,omitempty"` - FaultDomain string `protobuf:"bytes,9,opt,name=fault_domain,json=faultDomain,proto3" json:"fault_domain,omitempty"` - LastUpdate string `protobuf:"bytes,10,opt,name=last_update,json=lastUpdate,proto3" json:"last_update,omitempty"` + Info string `protobuf:"bytes,8,opt,name=info,proto3" json:"info,omitempty"` + FaultDomain string `protobuf:"bytes,9,opt,name=fault_domain,json=faultDomain,proto3" json:"fault_domain,omitempty"` + LastUpdate string `protobuf:"bytes,10,opt,name=last_update,json=lastUpdate,proto3" json:"last_update,omitempty"` + SecondaryFabricUris []string `protobuf:"bytes,11,rep,name=secondary_fabric_uris,json=secondaryFabricUris,proto3" json:"secondary_fabric_uris,omitempty"` } func (x *SystemMember) Reset() { @@ -149,6 +150,13 @@ func (x *SystemMember) GetLastUpdate() string { return "" } +func (x *SystemMember) GetSecondaryFabricUris() []string { + if x != nil { + return x.SecondaryFabricUris + } + return nil +} + // SystemStopReq supplies system shutdown parameters. type SystemStopReq struct { state protoimpl.MessageState @@ -834,7 +842,7 @@ var File_mgmt_system_proto protoreflect.FileDescriptor var file_mgmt_system_proto_rawDesc = []byte{ 0x0a, 0x11, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x1a, 0x12, 0x73, 0x68, 0x61, 0x72, 0x65, - 0x64, 0x2f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xa2, 0x02, + 0x64, 0x2f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xd6, 0x02, 0x0a, 0x0c, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x61, 0x64, 0x64, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x61, 0x64, 0x64, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, @@ -853,77 +861,80 @@ var file_mgmt_system_proto_rawDesc = []byte{ 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x22, 0x8b, 0x01, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x6f, - 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x70, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x70, 0x72, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x69, - 0x6c, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x6b, 0x69, 0x6c, 0x6c, 0x12, 0x14, - 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, - 0x6f, 0x72, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, - 0x73, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, - 0x22, 0x82, 0x01, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x6f, 0x70, 0x52, - 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, - 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x74, 0x65, 0x12, 0x32, 0x0a, 0x15, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, + 0x66, 0x61, 0x62, 0x72, 0x69, 0x63, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x0b, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x13, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x46, 0x61, 0x62, 0x72, + 0x69, 0x63, 0x55, 0x72, 0x69, 0x73, 0x22, 0x8b, 0x01, 0x0a, 0x0d, 0x53, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x72, + 0x65, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x70, 0x72, 0x65, 0x70, 0x12, 0x12, + 0x0a, 0x04, 0x6b, 0x69, 0x6c, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x6b, 0x69, + 0x6c, 0x6c, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, + 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, + 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, + 0x6f, 0x73, 0x74, 0x73, 0x22, 0x82, 0x01, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, + 0x74, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, + 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, + 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, + 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, + 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x4e, 0x0a, 0x0e, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, + 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0f, 0x53, 0x79, + 0x73, 0x74, 0x65, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, + 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, + 0x2e, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, + 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, + 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, + 0x4e, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, + 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, + 0x83, 0x01, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, + 0x65, 0x6d, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x52, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, - 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x4e, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, - 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, - 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, - 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, - 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, - 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, - 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, - 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, - 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, - 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x4e, 0x0a, 0x0e, 0x53, - 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0f, - 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x2c, 0x0a, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, - 0x6d, 0x62, 0x65, 0x72, 0x52, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x12, 0x20, 0x0a, - 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, - 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, - 0x73, 0x22, 0x22, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, - 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x3f, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, - 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, 0x61, 0x72, - 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, - 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x18, 0x0a, 0x07, - 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, - 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0xbe, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, - 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x3f, 0x0a, 0x07, - 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, - 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, 0x68, 0x0a, - 0x0d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, 0x67, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x6d, 0x73, 0x67, 0x12, 0x17, 0x0a, 0x07, 0x70, 0x6f, 0x6f, 0x6c, - 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, 0x6f, 0x6f, 0x6c, 0x49, - 0x64, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, - 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, - 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x22, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, + 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x3f, 0x0a, 0x0f, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, + 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, + 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x53, 0x79, + 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, + 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0xbe, 0x01, 0x0a, 0x11, 0x53, + 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x3f, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, + 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6c, 0x65, 0x61, 0x6e, + 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x73, 0x1a, 0x68, 0x0a, 0x0d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, + 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6d, 0x73, 0x67, 0x12, 0x17, 0x0a, 0x07, + 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, + 0x6f, 0x6f, 0x6c, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x42, 0x3a, 0x5a, 0x38, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, + 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, + 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/control/auto.go b/src/control/lib/control/auto.go index 22a9f6926e4..26a23de8819 100644 --- a/src/control/lib/control/auto.go +++ b/src/control/lib/control/auto.go @@ -621,7 +621,7 @@ func genConfig(log logging.Logger, newEngineCfg newEngineCfgFn, accessPoints []s engineCfg.Fabric = engine.FabricConfig{ Provider: nd.numaIfaces[nn].Provider, Interface: nd.numaIfaces[nn].Device, - InterfacePort: int(defaultFiPort + (nn * defaultFiPortInterval)), + InterfacePort: fmt.Sprintf("%d", defaultFiPort+(nn*defaultFiPortInterval)), } engines = append(engines, engineCfg) diff --git a/src/control/lib/control/system_test.go b/src/control/lib/control/system_test.go index 306f56d67c2..a3e65d42fd9 100644 --- a/src/control/lib/control/system_test.go +++ b/src/control/lib/control/system_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -621,10 +621,10 @@ func TestControl_SystemQuery(t *testing.T) { ), expResp: &SystemQueryResp{ Members: system.Members{ - system.NewMember(1, common.MockUUID(1), "", common.MockHostAddr(1), system.MemberStateReady).WithFaultDomain(fds[1]), - system.NewMember(2, common.MockUUID(2), "", common.MockHostAddr(1), system.MemberStateReady).WithFaultDomain(fds[2]), - system.NewMember(0, common.MockUUID(0), "", common.MockHostAddr(2), system.MemberStateStopped).WithFaultDomain(fds[0]), - system.NewMember(3, common.MockUUID(3), "", common.MockHostAddr(2), system.MemberStateStopped).WithFaultDomain(fds[3]), + system.NewMember(1, common.MockUUID(1), nil, common.MockHostAddr(1), system.MemberStateReady).WithFaultDomain(fds[1]), + system.NewMember(2, common.MockUUID(2), nil, common.MockHostAddr(1), system.MemberStateReady).WithFaultDomain(fds[2]), + system.NewMember(0, common.MockUUID(0), nil, common.MockHostAddr(2), system.MemberStateStopped).WithFaultDomain(fds[0]), + system.NewMember(3, common.MockUUID(3), nil, common.MockHostAddr(2), system.MemberStateStopped).WithFaultDomain(fds[3]), }, }, }, diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index b211e8834ac..8ca09468d30 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -574,7 +574,7 @@ func (cfg *Server) validateMultiServerConfig(log logging.Logger) error { seenHelperStreamCount := -1 for idx, engine := range cfg.Engines { - fabricConfig := fmt.Sprintf("fabric:%s-%s-%d", + fabricConfig := fmt.Sprintf("fabric:%q-%q-%q", engine.Fabric.Provider, engine.Fabric.Interface, engine.Fabric.InterfacePort) diff --git a/src/control/server/ctl_network_rpc.go b/src/control/server/ctl_network_rpc.go index 3f0bbf8f0a1..7e64fa879ba 100644 --- a/src/control/server/ctl_network_rpc.go +++ b/src/control/server/ctl_network_rpc.go @@ -20,7 +20,11 @@ import ( func (c *ControlService) NetworkScan(ctx context.Context, req *ctlpb.NetworkScanReq) (*ctlpb.NetworkScanResp, error) { c.log.Debugf("NetworkScanDevices() Received request: %s", req.GetProvider()) - provider := c.srvCfg.Fabric.Provider + provider, err := c.srvCfg.Fabric.GetPrimaryProvider() + if err != nil { + return nil, err + } + switch { case strings.EqualFold(req.GetProvider(), "all"): provider = "" diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index e0c142aaf06..e334850d725 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -7,6 +7,8 @@ package engine import ( + "fmt" + "strconv" "strings" "github.com/pkg/errors" @@ -17,13 +19,18 @@ import ( "github.com/daos-stack/daos/src/control/system" ) -const maxHelperStreamCount = 2 +const ( + maxHelperStreamCount = 2 + + // MultiProviderSeparator delineates between providers in a multi-provider config. + MultiProviderSeparator = " " +) // FabricConfig encapsulates networking fabric configuration. type FabricConfig struct { Provider string `yaml:"provider,omitempty" cmdEnv:"CRT_PHY_ADDR_STR"` Interface string `yaml:"fabric_iface,omitempty" cmdEnv:"OFI_INTERFACE"` - InterfacePort int `yaml:"fabric_iface_port,omitempty" cmdEnv:"OFI_PORT,nonzero"` + InterfacePort string `yaml:"fabric_iface_port,omitempty" cmdEnv:"OFI_PORT"` NumaNodeIndex uint `yaml:"-"` BypassHealthChk *bool `yaml:"bypass_health_chk,omitempty" cmdLongFlag:"--bypass_health_chk" cmdShortFlag:"-b"` CrtCtxShareAddr uint32 `yaml:"crt_ctx_share_addr,omitempty" cmdEnv:"CRT_CTX_SHARE_ADDR"` @@ -31,6 +38,89 @@ type FabricConfig struct { DisableSRX bool `yaml:"disable_srx,omitempty" cmdEnv:"FI_OFI_RXM_USE_SRX,invertBool,intBool"` } +// GetPrimaryProvider parses the primary provider from the Provider string. +func (fc *FabricConfig) GetPrimaryProvider() (string, error) { + providers, err := fc.GetProviders() + if err != nil { + return "", err + } + + return providers[0], nil +} + +// GetProviders parses the Provider string to one or more providers. +func (fc *FabricConfig) GetProviders() ([]string, error) { + if fc == nil { + return nil, errors.New("FabricConfig is nil") + } + + providers := splitMultiProviderStr(fc.Provider) + if len(providers) == 0 { + return nil, errors.New("provider not set") + } + + return providers, nil +} + +func splitMultiProviderStr(str string) []string { + strs := strings.Split(str, MultiProviderSeparator) + result := make([]string, 0) + for _, s := range strs { + trimmed := strings.TrimSpace(s) + if trimmed != "" { + result = append(result, trimmed) + } + } + + return result +} + +// GetPrimaryInterface parses the primary fabric interface from the Interface string. +func (fc *FabricConfig) GetPrimaryInterface() (string, error) { + interfaces, err := fc.GetInterfaces() + if err != nil { + return "", err + } + + return interfaces[0], nil +} + +// GetInterfaces parses the Interface string into one or more interfaces. +func (fc *FabricConfig) GetInterfaces() ([]string, error) { + if fc == nil { + return nil, errors.New("FabricConfig is nil") + } + + interfaces := splitMultiProviderStr(fc.Interface) + if len(interfaces) == 0 { + return nil, errors.New("fabric_iface not set") + } + + return interfaces, nil +} + +// GetInterfacePorts parses the InterfacePort string to one or more ports. +func (fc *FabricConfig) GetInterfacePorts() ([]int, error) { + if fc == nil { + return nil, errors.New("FabricConfig is nil") + } + + portStrs := splitMultiProviderStr(fc.InterfacePort) + if len(portStrs) == 0 { + return nil, errors.New("fabric_iface_port not set") + } + + ports := make([]int, 0) + for _, str := range portStrs { + intPort, err := strconv.Atoi(str) + if err != nil { + return nil, err + } + ports = append(ports, intPort) + } + return ports, nil +} + // Update fills in any missing fields from the provided FabricConfig. func (fc *FabricConfig) Update(other FabricConfig) { if fc.Provider == "" { @@ -39,7 +129,7 @@ func (fc *FabricConfig) Update(other FabricConfig) { if fc.Interface == "" { fc.Interface = other.Interface } - if fc.InterfacePort == 0 { + if fc.InterfacePort == "" { fc.InterfacePort = other.InterfacePort } if fc.CrtCtxShareAddr == 0 { @@ -52,18 +142,32 @@ func (fc *FabricConfig) Update(other FabricConfig) { // Validate ensures that the configuration meets minimum standards. func (fc *FabricConfig) Validate() error { - switch { - case fc.Provider == "": - return errors.New("provider not set") - case fc.Interface == "": - return errors.New("fabric_iface not set") - case fc.InterfacePort == 0: - return errors.New("fabric_iface_port not set") - case fc.InterfacePort < 0: - return errors.New("fabric_iface_port cannot be negative") - default: - return nil + prov, err := fc.GetProviders() + if err != nil { + return err + } + + interfaces, err := fc.GetInterfaces() + if err != nil { + return err + } + + ports, err := fc.GetInterfacePorts() + if err != nil { + return err + } + + for _, p := range ports { + if p < 0 { + return errors.New("fabric_iface_port cannot be negative") + } } + + if len(prov) != len(interfaces) || len(prov) != len(ports) { + return errors.Errorf("provider, fabric_iface and fabric_iface_port must include the same number of items delimited by %q", MultiProviderSeparator) + } + + return nil } // cleanEnvVars scrubs the supplied slice of environment @@ -170,12 +274,22 @@ func NewConfig() *Config { } // setAffinity ensures engine NUMA locality is assigned and valid. -func (c *Config) setAffinity(log logging.Logger, fis *hardware.FabricInterfaceSet) (err error) { +func (c *Config) setAffinity(log logging.Logger, fis *hardware.FabricInterfaceSet) error { + iface, err := c.Fabric.GetPrimaryInterface() + if err != nil { + return err + } + var fi *hardware.FabricInterface if fis != nil { - fi, err = fis.GetInterfaceOnOSDevice(c.Fabric.Interface, c.Fabric.Provider) + provider, err := c.Fabric.GetPrimaryProvider() if err != nil { - return + return err + } + + fi, err = fis.GetInterfaceOnOSDevice(iface, provider) + if err != nil { + return err } } @@ -186,11 +300,11 @@ func (c *Config) setAffinity(log logging.Logger, fis *hardware.FabricInterfaceSe // validate that numa node is correct for the given device if fi != nil && fi.NUMANode != *c.PinnedNumaNode { log.Errorf("misconfiguration: network interface %s is on NUMA "+ - "node %d but engine is pinned to NUMA node %d", c.Fabric.Interface, + "node %d but engine is pinned to NUMA node %d", iface, fi.NUMANode, *c.PinnedNumaNode) } - return + return nil } if fi == nil { @@ -201,7 +315,7 @@ func (c *Config) setAffinity(log logging.Logger, fis *hardware.FabricInterfaceSe c.Fabric.NumaNodeIndex = fi.NUMANode c.Storage.NumaNodeIndex = fi.NUMANode - return + return nil } // Validate ensures that the configuration meets minimum standards. @@ -368,7 +482,7 @@ func (c *Config) WithFabricInterface(iface string) *Config { // WithFabricInterfacePort sets the numeric interface port to be used by this instance. func (c *Config) WithFabricInterfacePort(ifacePort int) *Config { - c.Fabric.InterfacePort = ifacePort + c.Fabric.InterfacePort = fmt.Sprintf("%d", ifacePort) return c } diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 1695c58daf7..a9a6701bc03 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -483,14 +483,14 @@ func TestConfig_FabricValidation(t *testing.T) { "missing provider": { cfg: FabricConfig{ Interface: "bar", - InterfacePort: 42, + InterfacePort: "42", }, expErr: errors.New("provider"), }, "missing interface": { cfg: FabricConfig{ Provider: "foo", - InterfacePort: 42, + InterfacePort: "42", }, expErr: errors.New("fabric_iface"), }, @@ -505,10 +505,48 @@ func TestConfig_FabricValidation(t *testing.T) { cfg: FabricConfig{ Provider: "foo", Interface: "bar", - InterfacePort: -42, + InterfacePort: "-42", }, expErr: errors.New("fabric_iface_port"), }, + "success": { + cfg: FabricConfig{ + Provider: "foo", + Interface: "bar", + InterfacePort: "42", + }, + }, + "multi provider/interface/port ok": { + cfg: FabricConfig{ + Provider: "foo bar", + Interface: "baz net", + InterfacePort: "42 128", + }, + }, + "mismatched num providers": { + cfg: FabricConfig{ + Provider: "foo", + Interface: "bar baz", + InterfacePort: "42 128", + }, + expErr: errors.New("same number"), + }, + "mismatched num interfaces": { + cfg: FabricConfig{ + Provider: "foo bar", + Interface: "baz", + InterfacePort: "42 128", + }, + expErr: errors.New("same number"), + }, + "mismatched num ports": { + cfg: FabricConfig{ + Provider: "foo bar", + Interface: "baz net", + InterfacePort: "42", + }, + expErr: errors.New("same number"), + }, } { t.Run(name, func(t *testing.T) { gotErr := tc.cfg.Validate() @@ -714,3 +752,208 @@ func TestConfig_setAffinity(t *testing.T) { }) } } + +func TestFabricConfig_GetProviders(t *testing.T) { + for name, tc := range map[string]struct { + cfg *FabricConfig + expProviders []string + expErr error + }{ + "nil": { + expErr: errors.New("nil"), + }, + "empty": { + cfg: &FabricConfig{}, + expErr: errors.New("provider not set"), + }, + "single": { + cfg: &FabricConfig{ + Provider: "p1", + }, + expProviders: []string{"p1"}, + }, + "multi": { + cfg: &FabricConfig{ + Provider: "p1 p2 p3", + }, + expProviders: []string{"p1", "p2", "p3"}, + }, + "excessive whitespace": { + cfg: &FabricConfig{ + Provider: " p1 p2 p3", + }, + expProviders: []string{"p1", "p2", "p3"}, + }, + } { + t.Run(name, func(t *testing.T) { + providers, err := tc.cfg.GetProviders() + + common.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expProviders, providers); diff != "" { + t.Fatalf("(-want, +got):\n%s", diff) + } + }) + } +} + +func TestFabricConfig_GetPrimaryProvider(t *testing.T) { + for name, tc := range map[string]struct { + cfg *FabricConfig + expProvider string + expErr error + }{ + "nil": { + expErr: errors.New("nil"), + }, + "empty": { + cfg: &FabricConfig{}, + expErr: errors.New("provider not set"), + }, + "single": { + cfg: &FabricConfig{ + Provider: "p1", + }, + expProvider: "p1", + }, + "multi": { + cfg: &FabricConfig{ + Provider: "p1 p2 p3", + }, + expProvider: "p1", + }, + } { + t.Run(name, func(t *testing.T) { + provider, err := tc.cfg.GetPrimaryProvider() + + common.CmpErr(t, tc.expErr, err) + common.AssertEqual(t, tc.expProvider, provider, "") + }) + } +} + +func TestFabricConfig_GetInterfaces(t *testing.T) { + for name, tc := range map[string]struct { + cfg *FabricConfig + expInterfaces []string + expErr error + }{ + "nil": { + expErr: errors.New("nil"), + }, + "empty": { + cfg: &FabricConfig{}, + expErr: errors.New("fabric_iface not set"), + }, + "single": { + cfg: &FabricConfig{ + Interface: "net1", + }, + expInterfaces: []string{"net1"}, + }, + "multi": { + cfg: &FabricConfig{ + Interface: "net1 net2 net3", + }, + expInterfaces: []string{"net1", "net2", "net3"}, + }, + "excessive whitespace": { + cfg: &FabricConfig{ + Interface: " net1 net2 net3 ", + }, + expInterfaces: []string{"net1", "net2", "net3"}, + }, + } { + t.Run(name, func(t *testing.T) { + interfaces, err := tc.cfg.GetInterfaces() + + common.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expInterfaces, interfaces); diff != "" { + t.Fatalf("(-want, +got):\n%s", diff) + } + }) + } +} + +func TestFabricConfig_GetPrimaryInterface(t *testing.T) { + for name, tc := range map[string]struct { + cfg *FabricConfig + expInterface string + expErr error + }{ + "nil": { + expErr: errors.New("nil"), + }, + "empty": { + cfg: &FabricConfig{}, + expErr: errors.New("fabric_iface not set"), + }, + "single": { + cfg: &FabricConfig{ + Interface: "net1", + }, + expInterface: "net1", + }, + "multi": { + cfg: &FabricConfig{ + Interface: "net0 net1 net3", + }, + expInterface: "net0", + }, + } { + t.Run(name, func(t *testing.T) { + iface, err := tc.cfg.GetPrimaryInterface() + + common.CmpErr(t, tc.expErr, err) + common.AssertEqual(t, tc.expInterface, iface, "") + }) + } +} + +func TestFabricConfig_GetInterfacePorts(t *testing.T) { + for name, tc := range map[string]struct { + cfg *FabricConfig + expPorts []int + expErr error + }{ + "nil": { + expErr: errors.New("nil"), + }, + "empty": { + cfg: &FabricConfig{}, + expErr: errors.New("fabric_iface_port not set"), + }, + "single": { + cfg: &FabricConfig{ + InterfacePort: "1234", + }, + expPorts: []int{1234}, + }, + "multi": { + cfg: &FabricConfig{ + InterfacePort: "1234 5678 9012", + }, + expPorts: []int{1234, 5678, 9012}, + }, + "excessive whitespace": { + cfg: &FabricConfig{ + InterfacePort: " 1234 5678 9012 ", + }, + expPorts: []int{1234, 5678, 9012}, + }, + "non-integer port": { + cfg: &FabricConfig{ + InterfacePort: "1234 a123", + }, + expErr: errors.New("strconv.Atoi"), + }, + } { + t.Run(name, func(t *testing.T) { + ports, err := tc.cfg.GetInterfacePorts() + + common.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expPorts, ports); diff != "" { + t.Fatalf("(-want, +got):\n%s", diff) + } + }) + } +} diff --git a/src/control/server/mgmt_svc.go b/src/control/server/mgmt_svc.go index 317a372a4e9..152c3353175 100644 --- a/src/control/server/mgmt_svc.go +++ b/src/control/server/mgmt_svc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2018-2021 Intel Corporation. +// (C) Copyright 2018-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -28,7 +28,7 @@ type mgmtSvc struct { sysdb *system.Database rpcClient control.UnaryInvoker events *events.PubSub - clientNetworkHint *mgmtpb.ClientNetHint + clientNetworkHint []*mgmtpb.ClientNetHint joinReqs joinReqChan groupUpdateReqs chan bool lastMapVer uint32 @@ -42,7 +42,7 @@ func newMgmtSvc(h *EngineHarness, m *system.Membership, s *system.Database, c co sysdb: s, rpcClient: c, events: p, - clientNetworkHint: new(mgmtpb.ClientNetHint), + clientNetworkHint: []*mgmtpb.ClientNetHint{new(mgmtpb.ClientNetHint)}, joinReqs: make(joinReqChan), groupUpdateReqs: make(chan bool), } diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 84af73e6dfe..0eba0b7b87e 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -42,7 +42,7 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo if err := svc.checkReplicaRequest(req); err != nil { return nil, err } - if svc.clientNetworkHint == nil { + if len(svc.clientNetworkHint) == 0 { return nil, errors.New("clientNetworkHint is missing") } svc.log.Debugf("MgmtSvc.GetAttachInfo dispatch, req:%+v\n", *req) @@ -53,25 +53,45 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo } resp := new(mgmtpb.GetAttachInfoResp) - if req.GetAllRanks() { - for rank, uri := range groupMap.RankURIs { - resp.RankUris = append(resp.RankUris, &mgmtpb.GetAttachInfoResp_RankUri{ - Rank: rank.Uint32(), - Uri: uri, - }) - } - } else { + rankURIs := groupMap.RankURIs + if !req.GetAllRanks() { + rankURIs = make(map[system.Rank]system.URIs) + // If the request does not indicate that all ranks should be returned, // it may be from an older client, in which case we should just return // the MS ranks. for _, rank := range groupMap.MSRanks { - resp.RankUris = append(resp.RankUris, &mgmtpb.GetAttachInfoResp_RankUri{ - Rank: rank.Uint32(), - Uri: groupMap.RankURIs[rank], - }) + rankURIs[rank] = groupMap.RankURIs[rank] + } + } + + for rank, uris := range rankURIs { + if len(svc.clientNetworkHint) < len(uris.Secondary)+1 { + return nil, errors.Errorf("not enough client network hints (%d) for rank %d URIs (%d)", + len(svc.clientNetworkHint), rank, len(uris.Secondary)+1) + } + + resp.RankUris = append(resp.RankUris, &mgmtpb.GetAttachInfoResp_RankUri{ + Rank: rank.Uint32(), + Uri: uris.Primary, + Provider: svc.clientNetworkHint[0].Provider, + }) + + for i, uri := range uris.Secondary { + rankURI := &mgmtpb.GetAttachInfoResp_RankUri{ + Rank: rank.Uint32(), + Uri: uri, + Provider: svc.clientNetworkHint[i].Provider, + } + + resp.SecondaryRankUris = append(resp.SecondaryRankUris, rankURI) } } - resp.ClientNetHint = svc.clientNetworkHint + + resp.ClientNetHint = svc.clientNetworkHint[0] + if len(svc.clientNetworkHint) > 1 { + resp.SecondaryClientNetHints = svc.clientNetworkHint[1:] + } resp.MsRanks = system.RanksToUint32(groupMap.MSRanks) // For resp.RankUris may be large, we make a resp copy with a limited @@ -259,13 +279,14 @@ func (svc *mgmtSvc) join(ctx context.Context, req *batchJoinRequest) *batchJoinR } joinResponse, err := svc.membership.Join(&system.JoinRequest{ - Rank: system.Rank(req.Rank), - UUID: uuid, - ControlAddr: req.peerAddr, - FabricURI: req.GetUri(), - FabricContexts: req.GetNctxs(), - FaultDomain: fd, - Incarnation: req.GetIncarnation(), + Rank: system.Rank(req.Rank), + UUID: uuid, + ControlAddr: req.peerAddr, + PrimaryFabricURI: req.GetUri(), + SecondaryFabricURIs: req.GetSecondaryUris(), + FabricContexts: req.GetNctxs(), + FaultDomain: fd, + Incarnation: req.GetIncarnation(), }) if err != nil { return &batchJoinResponse{joinErr: err} @@ -273,11 +294,11 @@ func (svc *mgmtSvc) join(ctx context.Context, req *batchJoinRequest) *batchJoinR member := joinResponse.Member if joinResponse.Created { - svc.log.Debugf("new system member: rank %d, addr %s, uri %s", - member.Rank, req.peerAddr, member.FabricURI) + svc.log.Debugf("new system member: rank %d, addr %s, primary uri %s, secondary uris %s", + member.Rank, req.peerAddr, member.PrimaryFabricURI, member.SecondaryFabricURIs) } else { - svc.log.Debugf("updated system member: rank %d, uri %s, %s->%s", - member.Rank, member.FabricURI, joinResponse.PrevState, member.State()) + svc.log.Debugf("updated system member: rank %d, primary uri %s, secondary uris %s, %s->%s", + member.Rank, member.PrimaryFabricURI, member.SecondaryFabricURIs, joinResponse.PrevState, member.State()) } resp := &batchJoinResponse{ @@ -347,10 +368,10 @@ func (svc *mgmtSvc) doGroupUpdate(ctx context.Context, forced bool) error { MapVersion: gm.Version, } rankSet := &system.RankSet{} - for rank, uri := range gm.RankURIs { + for rank, uris := range gm.RankURIs { req.Engines = append(req.Engines, &mgmtpb.GroupUpdateReq_Engine{ Rank: rank.Uint32(), - Uri: uri, + Uri: uris.Primary, }) rankSet.Add(rank) } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 251d64f0f76..186682302ad 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -103,12 +103,14 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.FabricURI, + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, + Provider: "ofi+verbs", }, { - Rank: nonReplica.Rank.Uint32(), - Uri: nonReplica.FabricURI, + Rank: nonReplica.Rank.Uint32(), + Uri: nonReplica.PrimaryFabricURI, + Provider: "ofi+verbs", }, }, MsRanks: []uint32{0}, @@ -134,12 +136,14 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.FabricURI, + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, + Provider: "ofi+tcp", }, { - Rank: nonReplica.Rank.Uint32(), - Uri: nonReplica.FabricURI, + Rank: nonReplica.Rank.Uint32(), + Uri: nonReplica.PrimaryFabricURI, + Provider: "ofi+tcp", }, }, MsRanks: []uint32{0}, @@ -165,8 +169,9 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.FabricURI, + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, + Provider: "ofi+tcp", }, }, MsRanks: []uint32{0}, @@ -195,7 +200,7 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { if _, err := tc.svc.membership.Add(nonReplica); err != nil { t.Fatal(err) } - tc.svc.clientNetworkHint = tc.clientNetworkHint + tc.svc.clientNetworkHint = []*mgmtpb.ClientNetHint{tc.clientNetworkHint} gotResp, gotErr := tc.svc.GetAttachInfo(context.TODO(), tc.req) if gotErr != nil { t.Fatalf("unexpected error: %+v\n", gotErr) @@ -439,7 +444,7 @@ func mockMember(t *testing.T, r, a int32, s string) *system.Member { t.Fatalf("testcase specifies unknown member state %s", s) } - return system.NewMember(system.Rank(r), common.MockUUID(r), "", common.MockHostAddr(a), state) + return system.NewMember(system.Rank(r), common.MockUUID(r), []string{}, common.MockHostAddr(a), state) } func checkMembers(t *testing.T, exp system.Members, ms *system.Membership) { diff --git a/src/control/server/server.go b/src/control/server/server.go index 8f560c55dcc..74d2e636b5d 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -12,6 +12,7 @@ import ( "os" "os/signal" "os/user" + "strings" "sync" "syscall" "time" @@ -79,10 +80,20 @@ func processConfig(log *logging.LeveledLogger, cfg *config.Server, fis *hardware return faultDomain, nil } -func processFabricProvider(cfg *config.Server) { - if shouldAppendRXM(cfg.Fabric.Provider) { - cfg.WithFabricProvider(cfg.Fabric.Provider + ";ofi_rxm") +func processFabricProvider(cfg *config.Server) error { + providers, err := cfg.Fabric.GetProviders() + if err != nil { + return err } + + for i, p := range providers { + if shouldAppendRXM(p) { + providers[i] = p + ";ofi_rxm" + } + } + + cfg.WithFabricProvider(strings.Join(providers, engine.MultiProviderSeparator)) + return nil } func shouldAppendRXM(provider string) bool { @@ -102,7 +113,7 @@ type server struct { runningUser *user.User faultDomain *system.FaultDomain ctlAddr *net.TCPAddr - netDevClass hardware.NetDevClass + netDevClass []hardware.NetDevClass listener net.Listener harness *EngineHarness @@ -315,13 +326,24 @@ func (srv *server) setupGrpc() error { if err != nil { return err } - srv.mgmtSvc.clientNetworkHint = &mgmtpb.ClientNetHint{ - Provider: srv.cfg.Fabric.Provider, - CrtCtxShareAddr: srv.cfg.Fabric.CrtCtxShareAddr, - CrtTimeout: srv.cfg.Fabric.CrtTimeout, - NetDevClass: uint32(srv.netDevClass), - SrvSrxSet: srxSetting, + + providers, err := srv.cfg.Fabric.GetProviders() + if err != nil { + return err } + + clientNetHints := make([]*mgmtpb.ClientNetHint, 0, len(providers)) + for i, p := range providers { + clientNetHints = append(clientNetHints, &mgmtpb.ClientNetHint{ + Provider: p, + CrtCtxShareAddr: srv.cfg.Fabric.CrtCtxShareAddr, + CrtTimeout: srv.cfg.Fabric.CrtTimeout, + NetDevClass: uint32(srv.netDevClass[i]), + SrvSrxSet: srxSetting, + }) + } + srv.mgmtSvc.clientNetworkHint = clientNetHints + mgmtpb.RegisterMgmtSvcServer(srv.grpcServer, srv.mgmtSvc) tSec, err := security.DialOptionForTransportConfig(srv.cfg.TransportConfig) diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index ff180246330..1ee16269c9a 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -122,11 +122,31 @@ func updateFabricEnvars(log logging.Logger, cfg *engine.Config, fis *hardware.Fa // Mercury will now support the new OFI_DOMAIN environment variable so // that we can specify the correct device for each. if !cfg.HasEnvVar("OFI_DOMAIN") { - fi, err := fis.GetInterfaceOnOSDevice(cfg.Fabric.Interface, cfg.Fabric.Provider) + interfaces, err := cfg.Fabric.GetInterfaces() if err != nil { - return errors.Wrapf(err, "unable to determine device domain for %s", cfg.Fabric.Interface) + return err + } + + providers, err := cfg.Fabric.GetProviders() + if err != nil { + return err } - domain := fi.Name + + if len(providers) != len(interfaces) { + return errors.New("number of providers not equal to number of interfaces") + } + + domains := []string{} + + for i, p := range providers { + fi, err := fis.GetInterfaceOnOSDevice(interfaces[i], p) + if err != nil { + return errors.Wrapf(err, "unable to determine device domain for %s", interfaces[i]) + } + domains = append(domains, fi.Name) + } + + domain := strings.Join(domains, engine.MultiProviderSeparator) log.Debugf("setting OFI_DOMAIN=%s for %s", domain, cfg.Fabric.Interface) envVar := "OFI_DOMAIN=" + domain cfg.WithEnvVars(envVar) @@ -135,22 +155,29 @@ func updateFabricEnvars(log logging.Logger, cfg *engine.Config, fis *hardware.Fa return nil } -func getFabricNetDevClass(cfg *config.Server, fis *hardware.FabricInterfaceSet) (hardware.NetDevClass, error) { - var netDevClass hardware.NetDevClass +func getFabricNetDevClass(cfg *config.Server, fis *hardware.FabricInterfaceSet) ([]hardware.NetDevClass, error) { + netDevClass := []hardware.NetDevClass{} for index, engine := range cfg.Engines { - fi, err := fis.GetInterface(engine.Fabric.Interface) + cfgIfaces, err := engine.Fabric.GetInterfaces() if err != nil { - return 0, err + return nil, err } - ndc := fi.DeviceClass - if index == 0 { - netDevClass = ndc - continue - } - if ndc != netDevClass { - return 0, config.FaultConfigInvalidNetDevClass(index, netDevClass, - ndc, engine.Fabric.Interface) + for i, cfgIface := range cfgIfaces { + fi, err := fis.GetInterface(cfgIface) + if err != nil { + return nil, err + } + + ndc := fi.DeviceClass + if index == 0 { + netDevClass = append(netDevClass, ndc) + continue + } + if ndc != netDevClass[i] { + return nil, config.FaultConfigInvalidNetDevClass(index, netDevClass[i], + ndc, engine.Fabric.Interface) + } } } return netDevClass, nil diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 1c82ae87e92..efba16caa47 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -760,7 +760,7 @@ func TestServer_getNetDevClass(t *testing.T) { for name, tc := range map[string]struct { configA *engine.Config configB *engine.Config - expNetDevCls hardware.NetDevClass + expNetDevCls []hardware.NetDevClass expErr error }{ "successful validation with matching Infiniband": { @@ -768,14 +768,21 @@ func TestServer_getNetDevClass(t *testing.T) { WithFabricInterface("ib1"), configB: configB(). WithFabricInterface("ib0"), - expNetDevCls: hardware.Infiniband, + expNetDevCls: []hardware.NetDevClass{hardware.Infiniband}, }, "successful validation with matching Ethernet": { configA: configA(). WithFabricInterface("eth0"), configB: configB(). WithFabricInterface("eth1"), - expNetDevCls: hardware.Ether, + expNetDevCls: []hardware.NetDevClass{hardware.Ether}, + }, + "multi interface": { + configA: configA(). + WithFabricInterface("eth0 ib0"), + configB: configB(). + WithFabricInterface("eth1 ib1"), + expNetDevCls: []hardware.NetDevClass{hardware.Ether, hardware.Infiniband}, }, "mismatching net dev class with primary server as ib0 / Infiniband": { configA: configA(). diff --git a/src/control/system/database.go b/src/control/system/database.go index 08f406ac88e..2582770716b 100644 --- a/src/control/system/database.go +++ b/src/control/system/database.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -104,10 +104,16 @@ type ( SystemName string } + // URIs represents the set of URIs for a rank. + URIs struct { + Primary string + Secondary []string + } + // GroupMap represents a version of the system membership map. GroupMap struct { Version uint32 - RankURIs map[Rank]string + RankURIs map[Rank]URIs MSRanks []Rank } ) @@ -493,7 +499,7 @@ func (db *Database) IncMapVer() error { func newGroupMap(version uint32) *GroupMap { return &GroupMap{ Version: version, - RankURIs: make(map[Rank]string), + RankURIs: make(map[Rank]URIs), } } @@ -516,11 +522,16 @@ func (db *Database) GroupMap() (*GroupMap, error) { } // Quick sanity-check: Don't include members that somehow have // a nil rank or fabric URI, either. - if srv.Rank.Equals(NilRank) || srv.FabricURI == "" { - db.log.Errorf("member has invalid rank (%d) or URI (%s)", srv.Rank, srv.FabricURI) + if srv.Rank.Equals(NilRank) || srv.PrimaryFabricURI == "" { + db.log.Errorf("member has invalid rank (%d) or URIs (%s)", srv.Rank, + srv.PrimaryFabricURI) continue } - gm.RankURIs[srv.Rank] = srv.FabricURI + + gm.RankURIs[srv.Rank] = URIs{ + Primary: srv.PrimaryFabricURI, + Secondary: srv.SecondaryFabricURIs, + } if db.isReplica(srv.Addr) { gm.MSRanks = append(gm.MSRanks, srv.Rank) } diff --git a/src/control/system/database_test.go b/src/control/system/database_test.go index dc4c78bf359..292f83c4d58 100644 --- a/src/control/system/database_test.go +++ b/src/control/system/database_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -555,7 +555,7 @@ func TestSystem_Database_memberRaftOps(t *testing.T) { } func testMemberWithFaultDomain(rank Rank, fd *FaultDomain) *Member { - return NewMember(rank, uuid.New().String(), "dontcare", &net.TCPAddr{}, + return NewMember(rank, uuid.New().String(), []string{"dontcare"}, &net.TCPAddr{}, MemberStateJoined).WithFaultDomain(fd) } @@ -830,15 +830,15 @@ func TestSystem_Database_GroupMap(t *testing.T) { ), expGroupMap: &GroupMap{ Version: 11, - RankURIs: map[Rank]string{ - 0: mockControlAddr(t, 0).String(), - 2: mockControlAddr(t, 2).String(), - 3: mockControlAddr(t, 3).String(), - 4: mockControlAddr(t, 4).String(), - 5: mockControlAddr(t, 5).String(), - 6: mockControlAddr(t, 6).String(), - 9: mockControlAddr(t, 9).String(), - 10: mockControlAddr(t, 10).String(), + RankURIs: map[Rank]URIs{ + 0: {Primary: mockControlAddr(t, 0).String()}, + 2: {Primary: mockControlAddr(t, 2).String()}, + 3: {Primary: mockControlAddr(t, 3).String()}, + 4: {Primary: mockControlAddr(t, 4).String()}, + 5: {Primary: mockControlAddr(t, 5).String()}, + 6: {Primary: mockControlAddr(t, 6).String()}, + 9: {Primary: mockControlAddr(t, 9).String()}, + 10: {Primary: mockControlAddr(t, 10).String()}, }, }, }, @@ -846,21 +846,21 @@ func TestSystem_Database_GroupMap(t *testing.T) { members: membersWithStates(MemberStateJoined, MemberStateJoined), expGroupMap: &GroupMap{ Version: 2, - RankURIs: map[Rank]string{ - 0: mockControlAddr(t, 0).String(), - 1: mockControlAddr(t, 1).String(), + RankURIs: map[Rank]URIs{ + 0: {Primary: mockControlAddr(t, 0).String()}, + 1: {Primary: mockControlAddr(t, 1).String()}, }, MSRanks: []Rank{1}, }, }, "unset fabric URI skipped": { members: append([]*Member{ - NewMember(2, common.MockUUID(2), "", mockControlAddr(t, 2), MemberStateJoined), + NewMember(2, common.MockUUID(2), []string{}, mockControlAddr(t, 2), MemberStateJoined), }, membersWithStates(MemberStateJoined)...), expGroupMap: &GroupMap{ Version: 2, - RankURIs: map[Rank]string{ - 0: mockControlAddr(t, 0).String(), + RankURIs: map[Rank]URIs{ + 0: {Primary: mockControlAddr(t, 0).String()}, }, }, }, diff --git a/src/control/system/member.go b/src/control/system/member.go index 6189b8dab35..10a2cf0518b 100644 --- a/src/control/system/member.go +++ b/src/control/system/member.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2021 Intel Corporation. +// (C) Copyright 2019-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -159,16 +159,17 @@ func (ms MemberState) isTransitionIllegal(to MemberState) bool { // Member refers to a data-plane instance that is a member of this DAOS // system running on host with the control-plane listening at "Addr". type Member struct { - Rank Rank `json:"rank"` - Incarnation uint64 `json:"incarnation"` - UUID uuid.UUID `json:"uuid"` - Addr *net.TCPAddr `json:"addr"` - FabricURI string `json:"fabric_uri"` - FabricContexts uint32 `json:"fabric_contexts"` - state MemberState - Info string `json:"info"` - FaultDomain *FaultDomain `json:"fault_domain"` - LastUpdate time.Time `json:"last_update"` + Rank Rank `json:"rank"` + Incarnation uint64 `json:"incarnation"` + UUID uuid.UUID `json:"uuid"` + Addr *net.TCPAddr `json:"addr"` + PrimaryFabricURI string `json:"fabric_uri"` + SecondaryFabricURIs []string `json:"secondary_fabric_uris"` + FabricContexts uint32 `json:"fabric_contexts"` + state MemberState + Info string `json:"info"` + FaultDomain *FaultDomain `json:"fault_domain"` + LastUpdate time.Time `json:"last_update"` } // MarshalJSON marshals system.Member to JSON. @@ -253,14 +254,34 @@ func (sm *Member) WithFaultDomain(fd *FaultDomain) *Member { return sm } +// FabricURIs returns all fabric URIs, with the primary URI first. +func (sm *Member) FabricURIs() []string { + return append([]string{sm.PrimaryFabricURI}, sm.SecondaryFabricURIs...) +} + // NewMember returns a reference to a new member struct. -func NewMember(rank Rank, uuidStr, uri string, addr *net.TCPAddr, state MemberState) *Member { +func NewMember(rank Rank, uuidStr string, uris []string, addr *net.TCPAddr, state MemberState) *Member { // FIXME: Either require a valid uuid.UUID to be supplied // or else change the return signature to include an error newUUID := uuid.MustParse(uuidStr) - return &Member{Rank: rank, UUID: newUUID, FabricURI: uri, Addr: addr, - state: state, FaultDomain: MustCreateFaultDomain(), - LastUpdate: time.Now()} + + newMember := &Member{ + Rank: rank, + UUID: newUUID, + Addr: addr, + state: state, + FaultDomain: MustCreateFaultDomain(), + LastUpdate: time.Now(), + } + + if len(uris) > 0 { + newMember.PrimaryFabricURI = uris[0] + } + if len(uris) > 1 { + newMember.SecondaryFabricURIs = uris[1:] + } + + return newMember } // Members is a type alias for a slice of member references diff --git a/src/control/system/membership.go b/src/control/system/membership.go index ea637cf0c5d..7f0c3cfd701 100644 --- a/src/control/system/membership.go +++ b/src/control/system/membership.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -81,13 +81,14 @@ func (m *Membership) Count() (int, error) { // JoinRequest contains information needed for join membership update. type JoinRequest struct { - Rank Rank - UUID uuid.UUID - ControlAddr *net.TCPAddr - FabricURI string - FabricContexts uint32 - FaultDomain *FaultDomain - Incarnation uint64 + Rank Rank + UUID uuid.UUID + ControlAddr *net.TCPAddr + PrimaryFabricURI string + SecondaryFabricURIs []string + FabricContexts uint32 + FaultDomain *FaultDomain + Incarnation uint64 } // JoinResponse contains information returned from join membership update. @@ -104,6 +105,10 @@ func (m *Membership) Join(req *JoinRequest) (resp *JoinResponse, err error) { m.Lock() defer m.Unlock() + if req.PrimaryFabricURI == "" { + return nil, errors.New("no primary fabric URI in JoinRequest") + } + resp = new(JoinResponse) var curMember *Member if !req.Rank.Equals(NilRank) { @@ -143,7 +148,8 @@ func (m *Membership) Join(req *JoinRequest) (resp *JoinResponse, err error) { curMember.state = MemberStateJoined curMember.Info = "" curMember.Addr = req.ControlAddr - curMember.FabricURI = req.FabricURI + curMember.PrimaryFabricURI = req.PrimaryFabricURI + curMember.SecondaryFabricURIs = req.SecondaryFabricURIs curMember.FabricContexts = req.FabricContexts curMember.FaultDomain = req.FaultDomain curMember.Incarnation = req.Incarnation @@ -169,14 +175,15 @@ func (m *Membership) Join(req *JoinRequest) (resp *JoinResponse, err error) { } newMember := &Member{ - Rank: req.Rank, - Incarnation: req.Incarnation, - UUID: req.UUID, - Addr: req.ControlAddr, - FabricURI: req.FabricURI, - FabricContexts: req.FabricContexts, - FaultDomain: req.FaultDomain, - state: MemberStateJoined, + Rank: req.Rank, + Incarnation: req.Incarnation, + UUID: req.UUID, + Addr: req.ControlAddr, + PrimaryFabricURI: req.PrimaryFabricURI, + SecondaryFabricURIs: req.SecondaryFabricURIs, + FabricContexts: req.FabricContexts, + FaultDomain: req.FaultDomain, + state: MemberStateJoined, } if err := m.db.AddMember(newMember); err != nil { return nil, errors.Wrap(err, "failed to add new member") diff --git a/src/control/system/membership_test.go b/src/control/system/membership_test.go index bb6c5cb3320..379686a153d 100644 --- a/src/control/system/membership_test.go +++ b/src/control/system/membership_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -248,7 +248,7 @@ func TestSystem_Membership_HostRanks(t *testing.T) { MockMember(t, 1, MemberStateJoined), MockMember(t, 2, MemberStateStopped), MockMember(t, 3, MemberStateExcluded), - NewMember(Rank(4), MockUUID(4), addr1.String(), addr1, MemberStateStopped), // second host rank + NewMember(Rank(4), MockUUID(4), []string{addr1.String()}, addr1, MemberStateStopped), // second host rank } for name, tc := range map[string]struct { @@ -340,7 +340,7 @@ func TestSystem_Membership_CheckRanklist(t *testing.T) { MockMember(t, 1, MemberStateJoined), MockMember(t, 2, MemberStateStopped), MockMember(t, 3, MemberStateExcluded), - NewMember(Rank(4), common.MockUUID(4), "", addr1, MemberStateStopped), // second host rank + NewMember(Rank(4), common.MockUUID(4), []string{}, addr1, MemberStateStopped), // second host rank } for name, tc := range map[string]struct { @@ -430,7 +430,7 @@ func TestSystem_Membership_CheckHostlist(t *testing.T) { MockMember(t, 3, MemberStateExcluded), MockMember(t, 4, MemberStateJoined), MockMember(t, 5, MemberStateJoined), - NewMember(Rank(6), common.MockUUID(6), "", addr1, MemberStateStopped), // second host rank + NewMember(Rank(6), common.MockUUID(6), []string{}, addr1, MemberStateStopped), // second host rank } for name, tc := range map[string]struct { @@ -699,17 +699,18 @@ func TestSystem_Membership_Join(t *testing.T) { "not leader": { notLeader: true, req: &JoinRequest{ - FaultDomain: fd1, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: fd1, }, expErr: errors.New("leader"), }, "successful rejoin": { req: &JoinRequest{ - Rank: curMember.Rank, - UUID: curMember.UUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.Addr.String(), - FaultDomain: curMember.FaultDomain, + Rank: curMember.Rank, + UUID: curMember.UUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: curMember.FaultDomain, }, expResp: &JoinResponse{ Member: curMember, @@ -719,11 +720,11 @@ func TestSystem_Membership_Join(t *testing.T) { }, "successful rejoin with different fault domain": { req: &JoinRequest{ - Rank: curMember.Rank, - UUID: curMember.UUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.Addr.String(), - FaultDomain: fd2, + Rank: curMember.Rank, + UUID: curMember.UUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: fd2, }, expResp: &JoinResponse{ Member: MockMember(t, 0, MemberStateJoined).WithFaultDomain(fd2), @@ -733,42 +734,42 @@ func TestSystem_Membership_Join(t *testing.T) { }, "rejoin with existing UUID and unknown rank": { req: &JoinRequest{ - Rank: Rank(42), - UUID: curMember.UUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.Addr.String(), - FaultDomain: curMember.FaultDomain, + Rank: Rank(42), + UUID: curMember.UUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: curMember.FaultDomain, }, expErr: errUuidExists(curMember.UUID), }, "rejoin with existing UUID and nil rank": { req: &JoinRequest{ - Rank: NilRank, - UUID: curMember.UUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.Addr.String(), - FaultDomain: curMember.FaultDomain, + Rank: NilRank, + UUID: curMember.UUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: curMember.FaultDomain, }, expErr: errRankChanged(NilRank, curMember.Rank, curMember.UUID), }, "rejoin with different UUID and dupe rank": { req: &JoinRequest{ - Rank: curMember.Rank, - UUID: newUUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.Addr.String(), - FaultDomain: curMember.FaultDomain, + Rank: curMember.Rank, + UUID: newUUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: curMember.FaultDomain, }, expErr: errUuidChanged(newUUID, curMember.UUID, curMember.Rank), }, "successful join": { req: &JoinRequest{ - Rank: NilRank, - UUID: newMember.UUID, - ControlAddr: newMember.Addr, - FabricURI: newMember.FabricURI, - FabricContexts: newMember.FabricContexts, - FaultDomain: newMember.FaultDomain, + Rank: NilRank, + UUID: newMember.UUID, + ControlAddr: newMember.Addr, + PrimaryFabricURI: newMember.Addr.String(), + FabricContexts: newMember.FabricContexts, + FaultDomain: newMember.FaultDomain, }, expResp: &JoinResponse{ Created: true, @@ -779,23 +780,23 @@ func TestSystem_Membership_Join(t *testing.T) { }, "new member with bad fault domain depth": { req: &JoinRequest{ - Rank: NilRank, - UUID: newMemberShallowFD.UUID, - ControlAddr: newMemberShallowFD.Addr, - FabricURI: newMemberShallowFD.FabricURI, - FabricContexts: newMemberShallowFD.FabricContexts, - FaultDomain: newMemberShallowFD.FaultDomain, + Rank: NilRank, + UUID: newMemberShallowFD.UUID, + ControlAddr: newMemberShallowFD.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FabricContexts: newMemberShallowFD.FabricContexts, + FaultDomain: newMemberShallowFD.FaultDomain, }, expErr: FaultBadFaultDomainDepth(newMemberShallowFD.FaultDomain, curMember.FaultDomain.NumLevels()), }, "update existing member with bad fault domain depth": { req: &JoinRequest{ - Rank: curMember.Rank, - UUID: curMember.UUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.FabricURI, - FabricContexts: curMember.FabricContexts, - FaultDomain: shallowFD, + Rank: curMember.Rank, + UUID: curMember.UUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FabricContexts: curMember.FabricContexts, + FaultDomain: shallowFD, }, expErr: FaultBadFaultDomainDepth(newMemberShallowFD.FaultDomain, curMember.FaultDomain.NumLevels()), }, @@ -804,11 +805,11 @@ func TestSystem_Membership_Join(t *testing.T) { curMember, }, req: &JoinRequest{ - Rank: curMember.Rank, - UUID: curMember.UUID, - ControlAddr: curMember.Addr, - FabricURI: curMember.Addr.String(), - FaultDomain: shallowFD, + Rank: curMember.Rank, + UUID: curMember.UUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FaultDomain: shallowFD, }, expResp: &JoinResponse{ Member: MockMember(t, 0, MemberStateJoined).WithFaultDomain(shallowFD), diff --git a/src/control/system/mocks.go b/src/control/system/mocks.go index 0b2fa512f13..ef16f73d750 100644 --- a/src/control/system/mocks.go +++ b/src/control/system/mocks.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -32,7 +32,7 @@ func mockControlAddr(t *testing.T, idx uint32) *net.TCPAddr { func MockMember(t *testing.T, idx uint32, state MemberState, info ...string) *Member { addr := mockControlAddr(t, idx) m := NewMember(Rank(idx), common.MockUUID(int32(idx)), - addr.String(), addr, state) + []string{addr.String()}, addr, state) m.FabricContexts = idx if len(info) > 0 { m.Info = info[0] diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index 09bab1f00b8..a2fc1458ce2 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -827,7 +827,7 @@ const ProtobufCMessageDescriptor mgmt__group_update_resp__descriptor = (ProtobufCMessageInit) mgmt__group_update_resp__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[9] = +static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[10] = { { "sys", @@ -937,6 +937,18 @@ static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[9] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "secondary_uris", + 10, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Mgmt__JoinReq, n_secondary_uris), + offsetof(Mgmt__JoinReq, secondary_uris), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__join_req__field_indices_by_name[] = { 5, /* field[5] = addr */ @@ -944,6 +956,7 @@ static const unsigned mgmt__join_req__field_indices_by_name[] = { 8, /* field[8] = incarnation */ 4, /* field[4] = nctxs */ 2, /* field[2] = rank */ + 9, /* field[9] = secondary_uris */ 6, /* field[6] = srvFaultDomain */ 0, /* field[0] = sys */ 3, /* field[3] = uri */ @@ -952,7 +965,7 @@ static const unsigned mgmt__join_req__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__join_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 9 } + { 0, 10 } }; const ProtobufCMessageDescriptor mgmt__join_req__descriptor = { @@ -962,7 +975,7 @@ const ProtobufCMessageDescriptor mgmt__join_req__descriptor = "Mgmt__JoinReq", "mgmt", sizeof(Mgmt__JoinReq), - 9, + 10, mgmt__join_req__field_descriptors, mgmt__join_req__field_indices_by_name, 1, mgmt__join_req__number_ranges, @@ -1343,7 +1356,7 @@ const ProtobufCMessageDescriptor mgmt__client_net_hint__descriptor = (ProtobufCMessageInit) mgmt__client_net_hint__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__field_descriptors[2] = +static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__field_descriptors[3] = { { "rank", @@ -1369,15 +1382,28 @@ static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__fiel 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "provider", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp__RankUri, provider), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__get_attach_info_resp__rank_uri__field_indices_by_name[] = { + 2, /* field[2] = provider */ 0, /* field[0] = rank */ 1, /* field[1] = uri */ }; static const ProtobufCIntRange mgmt__get_attach_info_resp__rank_uri__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 2 } + { 0, 3 } }; const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__rank_uri__descriptor = { @@ -1387,14 +1413,14 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__rank_uri__descripto "Mgmt__GetAttachInfoResp__RankUri", "mgmt", sizeof(Mgmt__GetAttachInfoResp__RankUri), - 2, + 3, mgmt__get_attach_info_resp__rank_uri__field_descriptors, mgmt__get_attach_info_resp__rank_uri__field_indices_by_name, 1, mgmt__get_attach_info_resp__rank_uri__number_ranges, (ProtobufCMessageInit) mgmt__get_attach_info_resp__rank_uri__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descriptors[4] = +static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descriptors[6] = { { "status", @@ -1444,17 +1470,43 @@ static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descript 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "secondary_rank_uris", + 5, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_secondary_rank_uris), + offsetof(Mgmt__GetAttachInfoResp, secondary_rank_uris), + &mgmt__get_attach_info_resp__rank_uri__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "secondary_client_net_hints", + 6, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_secondary_client_net_hints), + offsetof(Mgmt__GetAttachInfoResp, secondary_client_net_hints), + &mgmt__client_net_hint__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__get_attach_info_resp__field_indices_by_name[] = { 3, /* field[3] = client_net_hint */ 2, /* field[2] = ms_ranks */ 1, /* field[1] = rank_uris */ + 5, /* field[5] = secondary_client_net_hints */ + 4, /* field[4] = secondary_rank_uris */ 0, /* field[0] = status */ }; static const ProtobufCIntRange mgmt__get_attach_info_resp__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 4 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__descriptor = { @@ -1464,7 +1516,7 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__descriptor = "Mgmt__GetAttachInfoResp", "mgmt", sizeof(Mgmt__GetAttachInfoResp), - 4, + 6, mgmt__get_attach_info_resp__field_descriptors, mgmt__get_attach_info_resp__field_indices_by_name, 1, mgmt__get_attach_info_resp__number_ranges, diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 074a75bb1aa..2ad92eb134c 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -69,6 +69,9 @@ struct _Mgmt__GroupUpdateReq__Engine { ProtobufCMessage base; uint32_t rank; + /* + * primary URI is the only one group update is concerned with + */ char *uri; }; #define MGMT__GROUP_UPDATE_REQ__ENGINE__INIT \ @@ -114,7 +117,7 @@ struct _Mgmt__JoinReq */ uint32_t rank; /* - * Server CaRT base URI (i.e., for context 0). + * Server CaRT primary provider URI (i.e., for context 0). */ char *uri; /* @@ -137,10 +140,15 @@ struct _Mgmt__JoinReq * rank incarnation */ uint64_t incarnation; + /* + * URIs for any secondary providers + */ + size_t n_secondary_uris; + char **secondary_uris; }; #define MGMT__JOIN_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_req__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0 } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0,NULL } struct _Mgmt__JoinResp @@ -259,10 +267,11 @@ struct _Mgmt__GetAttachInfoResp__RankUri ProtobufCMessage base; uint32_t rank; char *uri; + char *provider; }; #define MGMT__GET_ATTACH_INFO_RESP__RANK_URI__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_resp__rank_uri__descriptor) \ - , 0, (char *)protobuf_c_empty_string } + , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } struct _Mgmt__GetAttachInfoResp @@ -273,7 +282,7 @@ struct _Mgmt__GetAttachInfoResp */ int32_t status; /* - * Rank URIs + * Rank URIs for the primary provider */ size_t n_rank_uris; Mgmt__GetAttachInfoResp__RankUri **rank_uris; @@ -286,11 +295,24 @@ struct _Mgmt__GetAttachInfoResp */ size_t n_ms_ranks; uint32_t *ms_ranks; + /* + * Primary provider hint + */ Mgmt__ClientNetHint *client_net_hint; + /* + * Rank URIs for additional providers + */ + size_t n_secondary_rank_uris; + Mgmt__GetAttachInfoResp__RankUri **secondary_rank_uris; + /* + * Hints for additional providers + */ + size_t n_secondary_client_net_hints; + Mgmt__ClientNetHint **secondary_client_net_hints; }; #define MGMT__GET_ATTACH_INFO_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_resp__descriptor) \ - , 0, 0,NULL, 0,NULL, NULL } + , 0, 0,NULL, 0,NULL, NULL, 0,NULL, 0,NULL } struct _Mgmt__PrepShutdownReq diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 9da2a2992de..cc35f5e2305 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -1,5 +1,5 @@ // -// (C) Copyright 2018-2021 Intel Corporation. +// (C) Copyright 2018-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -20,7 +20,7 @@ message DaosResp { message GroupUpdateReq { message Engine { uint32 rank = 1; - string uri = 2; + string uri = 2; // primary URI is the only one group update is concerned with } uint32 map_version = 1; repeated Engine engines = 2; @@ -31,15 +31,16 @@ message GroupUpdateResp { } message JoinReq { - string sys = 1; // DAOS system name. - string uuid = 2; // Server UUID. - uint32 rank = 3; // Server rank desired, if not MAX_UINT32. - string uri = 4; // Server CaRT base URI (i.e., for context 0). - uint32 nctxs = 5; // Server CaRT context count. - string addr = 6; // Server management address. - string srvFaultDomain = 7; // Fault domain for this instance's server - uint32 idx = 8; // Instance index on server node. - uint64 incarnation = 9; // rank incarnation + string sys = 1; // DAOS system name. + string uuid = 2; // Server UUID. + uint32 rank = 3; // Server rank desired, if not MAX_UINT32. + string uri = 4; // Server CaRT primary provider URI (i.e., for context 0). + uint32 nctxs = 5; // Server CaRT context count. + string addr = 6; // Server management address. + string srvFaultDomain = 7; // Fault domain for this instance's server + uint32 idx = 8; // Instance index on server node. + uint64 incarnation = 9; // rank incarnation + repeated string secondary_uris = 10; // URIs for any secondary providers } message JoinResp { @@ -84,12 +85,15 @@ message GetAttachInfoResp { message RankUri { uint32 rank = 1; string uri = 2; + string provider = 3; } - repeated RankUri rank_uris = 2; // Rank URIs + repeated RankUri rank_uris = 2; // Rank URIs for the primary provider // These CaRT settings are shared with the // libdaos client to aid in CaRT initialization. repeated uint32 ms_ranks = 3; // Ranks local to MS replicas - ClientNetHint client_net_hint = 4; + ClientNetHint client_net_hint = 4; // Primary provider hint + repeated RankUri secondary_rank_uris = 5; // Rank URIs for additional providers + repeated ClientNetHint secondary_client_net_hints = 6; // Hints for additional providers } message PrepShutdownReq { diff --git a/src/proto/mgmt/system.proto b/src/proto/mgmt/system.proto index 84ea1c7505e..8557a2f1bf2 100644 --- a/src/proto/mgmt/system.proto +++ b/src/proto/mgmt/system.proto @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2021 Intel Corporation. +// (C) Copyright 2019-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -28,6 +28,7 @@ message SystemMember { string info = 8; string fault_domain = 9; string last_update = 10; + repeated string secondary_fabric_uris = 11; } // SystemStopReq supplies system shutdown parameters. From 7ddb133bbace1494cd505d4c07735de1a5e63be5 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Tue, 15 Mar 2022 08:57:19 -0600 Subject: [PATCH 02/28] DAOS-9623 agent: Support configurable provider (#8452) This patch allows the agent to support secondary providers if any are returned by the server in the GetAttachInfo call. The client requires no knowledge of primary and secondary providers. The agent decides which provider to return based on its config file. - Add support for secondary providers to GetAttachInfo messages. - Add optional "provider" parameter for the agent configuration. If specified the agent will search secondary providers for the requested provider, and return the appropriate network info to the client as if it was the primary provider. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/config.go | 1 + src/control/cmd/daos_agent/config_test.go | 2 + src/control/cmd/daos_agent/mgmt_rpc.go | 42 ++- src/control/cmd/daos_agent/mgmt_rpc_test.go | 287 ++++++++++++++++++-- src/control/cmd/daos_agent/start.go | 1 + src/control/lib/control/network.go | 13 +- utils/config/daos_agent.yml | 5 + 7 files changed, 322 insertions(+), 29 deletions(-) diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index d137503c22c..ea4313ab7c3 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -34,6 +34,7 @@ type Config struct { LogLevel common.ControlLogLevel `yaml:"control_log_mask,omitempty"` TransportConfig *security.TransportConfig `yaml:"transport_config"` FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` + Provider string `yaml:"provider,omitempty"` } // NUMAFabricConfig defines a list of fabric interfaces that belong to a NUMA diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index 7412d3f0016..939e5d039bc 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -62,6 +62,7 @@ fabric_ifaces: - iface: ib3 domain: mlx5_3 +provider: ofi+tcp `) badLogMaskCfg := common.CreateTestFile(t, dir, ` @@ -155,6 +156,7 @@ transport_config: }, }, }, + Provider: "ofi+tcp", }, }, } { diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index f59cb29cbe0..e14b6cc7e3c 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -36,6 +36,7 @@ type mgmtModule struct { numaAware bool netCtx context.Context monitor *procMon + provider string } func (mod *mgmtModule) HandleCall(session *drpc.Session, method drpc.Method, req []byte) ([]byte, error) { @@ -131,13 +132,19 @@ func (mod *mgmtModule) handleGetAttachInfo(ctx context.Context, reqb []byte, pid } func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) { - resp, err := mod.getAttachInfoResp(ctx, numaNode, sys) + rawResp, err := mod.getAttachInfoResp(ctx, numaNode, sys) if err != nil { mod.log.Errorf("failed to fetch remote AttachInfo: %s", err.Error()) return nil, err } - fabricIF, err := mod.getFabricInterface(ctx, numaNode, hardware.NetDevClass(resp.ClientNetHint.NetDevClass), resp.ClientNetHint.Provider) + resp, err := mod.getProviderAttachInfo(rawResp) + if err != nil { + return nil, err + } + + fabricIF, err := mod.getFabricInterface(ctx, numaNode, hardware.NetDevClass(resp.ClientNetHint.NetDevClass), + resp.ClientNetHint.Provider) if err != nil { mod.log.Errorf("failed to fetch fabric interface of type %s: %s", hardware.NetDevClass(resp.ClientNetHint.NetDevClass), err.Error()) @@ -159,6 +166,37 @@ func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, numaNode int, sys return mod.attachInfo.Get(ctx, numaNode, sys, mod.getAttachInfoRemote) } +func (mod *mgmtModule) getProviderAttachInfo(srvResp *mgmtpb.GetAttachInfoResp) (*mgmtpb.GetAttachInfoResp, error) { + if mod.provider == "" || mod.provider == srvResp.ClientNetHint.Provider { + return srvResp, nil + } + + uris := []*mgmtpb.GetAttachInfoResp_RankUri{} + for _, uri := range srvResp.SecondaryRankUris { + if uri.Provider == mod.provider { + uris = append(uris, uri) + } + } + + if len(uris) == 0 { + return nil, errors.Errorf("no rank URIs for provider %q", mod.provider) + } + + for _, hint := range srvResp.SecondaryClientNetHints { + if hint.Provider == mod.provider { + + return &mgmtpb.GetAttachInfoResp{ + Status: srvResp.Status, + RankUris: uris, + MsRanks: srvResp.MsRanks, + ClientNetHint: hint, + }, nil + } + } + + return nil, errors.Errorf("no ClientNetHint for provider %q", mod.provider) +} + func (mod *mgmtModule) getAttachInfoRemote(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) { // Ask the MS for _all_ info, regardless of pbReq.AllRanks, so that the // cache can serve future "pbReq.AllRanks == true" requests. diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go index 626817ad25e..3f4dfef38b7 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc_test.go +++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go @@ -22,7 +22,254 @@ import ( "github.com/daos-stack/daos/src/control/logging" ) +func hostResps(resps ...*mgmtpb.GetAttachInfoResp) []*control.HostResponse { + result := []*control.HostResponse{} + for _, r := range resps { + result = append(result, &control.HostResponse{ + Message: r, + }) + } + return result +} + func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { + testSrvResp := func() *mgmtpb.GetAttachInfoResp { + return &mgmtpb.GetAttachInfoResp{ + RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ + { + Rank: 0, + Uri: "uri0", + Provider: "ofi+verbs", + }, + { + Rank: 1, + Uri: "uri1", + Provider: "ofi+verbs", + }, + { + Rank: 3, + Uri: "uri3", + Provider: "ofi+verbs", + }, + }, + SecondaryRankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ + { + Rank: 0, + Uri: "uri4-sec", + Provider: "ofi+sockets", + }, + { + Rank: 1, + Uri: "uri5-sec", + Provider: "ofi+sockets", + }, + { + Rank: 3, + Uri: "uri6-sec", + Provider: "ofi+sockets", + }, + { + Rank: 0, + Uri: "uri0-sec", + Provider: "ofi+tcp", + }, + { + Rank: 1, + Uri: "uri1-sec", + Provider: "ofi+tcp", + }, + { + Rank: 3, + Uri: "uri3-sec", + Provider: "ofi+tcp", + }, + }, + MsRanks: []uint32{0, 1, 3}, + ClientNetHint: &mgmtpb.ClientNetHint{ + Provider: "ofi+verbs", + NetDevClass: uint32(hardware.Infiniband), + }, + SecondaryClientNetHints: []*mgmtpb.ClientNetHint{ + { + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Ether), + }, + }, + } + } + + hintResp := func(fi, domain string) *mgmtpb.GetAttachInfoResp { + withHint := testSrvResp() + withHint.ClientNetHint.Interface = fi + withHint.ClientNetHint.Domain = domain + + return withHint + } + + for name, tc := range map[string]struct { + provider string + numaNode int + rpcResp *control.HostResponse + expResp *mgmtpb.GetAttachInfoResp + expErr error + }{ + "RPC error": { + rpcResp: &control.HostResponse{ + Error: errors.New("mock RPC"), + }, + expErr: errors.New("mock RPC"), + }, + "no provider hint": { + rpcResp: &control.HostResponse{ + Message: &mgmtpb.GetAttachInfoResp{ + RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ + { + Rank: 0, + Uri: "uri0", + Provider: "ofi+verbs", + }, + }, + MsRanks: []uint32{0}, + ClientNetHint: &mgmtpb.ClientNetHint{ + NetDevClass: uint32(hardware.Infiniband), + }, + }, + }, + expErr: errors.New("no provider"), + }, + "no provider match": { + rpcResp: &control.HostResponse{ + Message: &mgmtpb.GetAttachInfoResp{ + RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ + { + Rank: 0, + Uri: "uri0", + Provider: "notreal", + }, + }, + MsRanks: []uint32{0}, + ClientNetHint: &mgmtpb.ClientNetHint{ + Provider: "notreal", + NetDevClass: uint32(hardware.Infiniband), + }, + }, + }, + expErr: errors.New("no suitable fabric interface"), + }, + "basic success": { + + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: hintResp("fi0", "d0"), + }, + "primary provider": { + provider: "ofi+verbs", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: hintResp("fi0", "d0"), + }, + "secondary provider": { + provider: "ofi+tcp", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: &mgmtpb.GetAttachInfoResp{ + RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ + { + Rank: 0, + Uri: "uri0-sec", + Provider: "ofi+tcp", + }, + { + Rank: 1, + Uri: "uri1-sec", + Provider: "ofi+tcp", + }, + { + Rank: 3, + Uri: "uri3-sec", + Provider: "ofi+tcp", + }, + }, + MsRanks: []uint32{0, 1, 3}, + ClientNetHint: &mgmtpb.ClientNetHint{ + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Ether), + Interface: "fi1", + Domain: "fi1", + }, + }, + }, + "config provider not found": { + provider: "notreal", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expErr: errors.New("no rank URIs for provider"), + }, + "config provider hint missing": { + provider: "ofi+sockets", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expErr: errors.New("no ClientNetHint for provider"), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer common.ShowBufferOnFailure(t, buf) + + testFabric := &NUMAFabric{ + log: log, + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "fi0", + Domain: "d0", + NetDevClass: hardware.Infiniband, + Providers: []string{"ofi+verbs"}, + }, + { + Name: "fi1", + NetDevClass: hardware.Ether, + Providers: []string{"ofi+tcp"}, + }, + }, + }, + } + + sysName := "dontcare" + mod := &mgmtModule{ + log: log, + sys: sysName, + fabricInfo: newTestFabricCache(t, log, testFabric), + attachInfo: newAttachInfoCache(log, true), + ctlInvoker: control.NewMockInvoker(log, &control.MockInvokerConfig{ + Sys: sysName, + UnaryResponse: &control.UnaryResponse{ + Responses: []*control.HostResponse{tc.rpcResp}, + }, + }), + provider: tc.provider, + } + + resp, err := mod.getAttachInfo(context.Background(), tc.numaNode, sysName) + + common.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported( + mgmtpb.GetAttachInfoResp{}, + mgmtpb.GetAttachInfoResp_RankUri{}, + mgmtpb.ClientNetHint{}, + )); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} + +func TestAgent_mgmtModule_getAttachInfo_cacheResp(t *testing.T) { testResps := []*mgmtpb.GetAttachInfoResp{ { MsRanks: []uint32{0, 1, 3}, @@ -47,18 +294,6 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, } - hostResps := func(resps []*mgmtpb.GetAttachInfoResp) []*control.HostResponse { - result := []*control.HostResponse{} - - for _, r := range resps { - result = append(result, &control.HostResponse{ - Message: r, - }) - } - - return result - } - testFI := &FabricInterface{ Name: "test0", Domain: "test0", @@ -68,7 +303,7 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { hintResp := func(resp *mgmtpb.GetAttachInfoResp) *mgmtpb.GetAttachInfoResp { withHint := new(mgmtpb.GetAttachInfoResp) - *withHint = *testResps[0] + *withHint = *resp withHint.ClientNetHint.Interface = testFI.Name withHint.ClientNetHint.Domain = testFI.Name @@ -77,12 +312,12 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { for name, tc := range map[string]struct { cacheDisabled bool - rpcResps []*control.HostResponse + rpcResps []*mgmtpb.GetAttachInfoResp expResps []*mgmtpb.GetAttachInfoResp }{ "cache disabled": { cacheDisabled: true, - rpcResps: hostResps(testResps), + rpcResps: testResps, expResps: []*mgmtpb.GetAttachInfoResp{ hintResp(testResps[0]), hintResp(testResps[1]), @@ -90,7 +325,7 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, }, "cached": { - rpcResps: hostResps(testResps), + rpcResps: testResps, expResps: []*mgmtpb.GetAttachInfoResp{ hintResp(testResps[0]), hintResp(testResps[0]), @@ -103,6 +338,19 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { defer common.ShowBufferOnFailure(t, buf) sysName := "dontcare" + mockInvokerCfg := &control.MockInvokerConfig{ + Sys: sysName, + UnaryResponseSet: []*control.UnaryResponse{}, + } + + for _, rpcResp := range tc.rpcResps { + mockInvokerCfg.UnaryResponseSet = append(mockInvokerCfg.UnaryResponseSet, + &control.UnaryResponse{ + Responses: hostResps(rpcResp), + }, + ) + } + mod := &mgmtModule{ log: log, sys: sysName, @@ -115,12 +363,7 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, }), attachInfo: newAttachInfoCache(log, !tc.cacheDisabled), - ctlInvoker: control.NewMockInvoker(log, &control.MockInvokerConfig{ - Sys: sysName, - UnaryResponse: &control.UnaryResponse{ - Responses: tc.rpcResps, - }, - }), + ctlInvoker: control.NewMockInvoker(log, mockInvokerCfg), } for _, expResp := range tc.expResps { diff --git a/src/control/cmd/daos_agent/start.go b/src/control/cmd/daos_agent/start.go index ff807d7b10b..e0503f1fe40 100644 --- a/src/control/cmd/daos_agent/start.go +++ b/src/control/cmd/daos_agent/start.go @@ -94,6 +94,7 @@ func (cmd *startCmd) Execute(_ []string) error { numaAware: numaAware, netCtx: netCtx, monitor: procmon, + provider: cmd.cfg.Provider, }) err = drpcServer.Start() diff --git a/src/control/lib/control/network.go b/src/control/lib/control/network.go index 7ee05b872fe..760a715c6da 100644 --- a/src/control/lib/control/network.go +++ b/src/control/lib/control/network.go @@ -207,8 +207,9 @@ type ( // PrimaryServiceRank provides a rank->uri mapping for a DAOS // Primary Service Rank (PSR). PrimaryServiceRank struct { - Rank uint32 - Uri string + Rank uint32 + Uri string + Provider string } ClientNetworkHint struct { @@ -224,9 +225,11 @@ type ( } GetAttachInfoResp struct { - ServiceRanks []*PrimaryServiceRank `json:"rank_uris"` - MSRanks []uint32 `json:"ms_ranks"` - ClientNetHint ClientNetworkHint `json:"client_net_hint"` + ServiceRanks []*PrimaryServiceRank `json:"rank_uris"` + AlternateServiceRanks []*PrimaryServiceRank `json:"secondary_rank_uris"` + MSRanks []uint32 `json:"ms_ranks"` + ClientNetHint ClientNetworkHint `json:"client_net_hint"` + AlternateClientNetHints []ClientNetworkHint `json:"secondary_client_net_hints"` } ) diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml index fbc062e671d..fb89a30d217 100644 --- a/utils/config/daos_agent.yml +++ b/utils/config/daos_agent.yml @@ -84,3 +84,8 @@ # - # iface: ib3 # domain: mlx5_3 + +# Manually force a specific fabric provider to be used by all clients, in the event that the server +# supports multiple providers. +# +#provider: ofi+verbs From ffaa379730f4a7fceec885151697e545733c844a Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Tue, 22 Mar 2022 23:26:38 +0800 Subject: [PATCH 03/28] DAOS-9928 srv: xstreams for secondary cart contexts (#8413) * DAOS-9928 srv: xstreams for secondary cart contexts This patch adjusted xstream starting and core assignment code to support xstreams for secondary Cart contexts. It also have two minor fixes to current code: - Oversubscribe doesn't work in NUMA mode, fallthrough to legacy mode when oversubscribe is specified; - 'Core offset' should be ignored when NUMA mode is used; Signed-off-by: Niu Yawei --- src/engine/init.c | 20 +- src/engine/srv.c | 413 ++++++++++++++++++++++------- src/engine/srv_internal.h | 33 +-- src/include/daos/rpc.h | 2 +- src/include/daos_srv/daos_engine.h | 3 + 5 files changed, 338 insertions(+), 133 deletions(-) diff --git a/src/engine/init.c b/src/engine/init.c index b71535eec63..34f52d68b16 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -295,7 +295,6 @@ dss_topo_init() { int depth; int numa_node_nr; - int num_cores_visited; char *cpuset; int k; hwloc_obj_t corenode; @@ -310,9 +309,14 @@ dss_topo_init() numa_node_nr = hwloc_get_nbobjs_by_depth(dss_topo, depth); d_getenv_bool("DAOS_TARGET_OVERSUBSCRIBE", &tgt_oversub); - /* if no NUMA node was specified, or NUMA data unavailable */ - /* fall back to the legacy core allocation algorithm */ - if (dss_numa_node == -1 || numa_node_nr <= 0) { + /* + * Use legacy core allocation algorithm when: + * + * - No NUMA node was specified, or; + * - NUMA data unavailable, or; + * - Oversubscribe is specified (which isn't supported in NUMA mode); + */ + if (dss_numa_node == -1 || numa_node_nr <= 0 || tgt_oversub) { D_PRINT("Using legacy core allocation algorithm\n"); dss_tgt_nr = dss_tgt_nr_get(dss_core_nr, nr_threads, tgt_oversub); @@ -350,7 +354,6 @@ dss_topo_init() } dss_num_cores_numa_node = 0; - num_cores_visited = 0; for (k = 0; k < dss_core_nr; k++) { corenode = hwloc_get_obj_by_depth(dss_topo, dss_core_depth, k); @@ -358,11 +361,8 @@ dss_topo_init() continue; if (hwloc_bitmap_isincluded(corenode->cpuset, numa_obj->cpuset) != 0) { - if (num_cores_visited++ >= dss_core_offset) { - hwloc_bitmap_set(core_allocation_bitmap, k); - hwloc_bitmap_asprintf(&cpuset, - corenode->cpuset); - } + hwloc_bitmap_set(core_allocation_bitmap, k); + hwloc_bitmap_asprintf(&cpuset, corenode->cpuset); dss_num_cores_numa_node++; } } diff --git a/src/engine/srv.c b/src/engine/srv.c index 33338f5a6d9..44c6f5b40eb 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -70,6 +70,9 @@ /** Number of dRPC xstreams */ #define DRPC_XS_NR (1) + +/** Number of secondary cart context XS */ +unsigned int dss_sec_xs_nr; /** Number of offload XS */ unsigned int dss_tgt_offload_xs_nr; /** Number of target (XS set) per engine */ @@ -102,7 +105,9 @@ dss_ctx_nr_get(void) #define DSS_SYS_XS_NAME_FMT "daos_sys_%d" #define DSS_IO_XS_NAME_FMT "daos_io_%d" +#define DSS_IOFW_XS_NAME_FMT "daos_iofw_%d" #define DSS_OFFLOAD_XS_NAME_FMT "daos_off_%d" +#define DSS_SEC_XS_NAME_FMT "daos_sec_%d" struct dss_xstream_data { /** Initializing step, it is for cleanup of global states */ @@ -334,6 +339,71 @@ wait_all_exited(struct dss_xstream *dx) D_DEBUG(DB_TRACE, "XS(%d) drained ULTs.\n", dx->dx_xs_id); } +/* Get xstream type from xstream ID */ +static unsigned int +xs_id2type(unsigned int xs_id) +{ + unsigned int helper_per_tgt, xs_offset; + + D_ASSERTF(xs_id >= 0 && xs_id < DSS_XS_NR_TOTAL, + "invalid xs_id %d, dss_tgt_nr %d, dss_tgt_offload_xs_nr %d, dss_sec_xs_nr %d\n", + xs_id, dss_tgt_nr, dss_tgt_offload_xs_nr, dss_sec_xs_nr); + + if (xs_id == 0) + return DSS_XS_SYS; + else if (xs_id == 1) + return DSS_XS_SWIM; + else if (xs_id < dss_sys_xs_nr) + return DSS_XS_DRPC; + else if (xs_id >= dss_sys_xs_nr + dss_tgt_nr + dss_tgt_offload_xs_nr) + return DSS_XS_SEC; + + if (dss_helper_pool) { + if (xs_id < (dss_sys_xs_nr + dss_tgt_nr)) + return DSS_XS_VOS; + else if (xs_id < (dss_sys_xs_nr + 2 * dss_tgt_nr)) + return DSS_XS_IOFW; + else + return DSS_XS_OFFLOAD; + } + + helper_per_tgt = dss_tgt_offload_xs_nr / dss_tgt_nr; + D_ASSERT(helper_per_tgt == 0 || helper_per_tgt == 1 || helper_per_tgt == 2); + + xs_offset = (xs_id - dss_sys_xs_nr) % (helper_per_tgt + 1); + if (xs_offset == 0) + return DSS_XS_VOS; + else if (xs_offset == 1) + return DSS_XS_IOFW; + else + return DSS_XS_OFFLOAD; +} + +/* Get target ID from xstream ID */ +static inline int +xs_id2tgt(int xs_id) +{ + unsigned int helper_per_tgt; + unsigned int xs_type = xs_id2type(xs_id); + + if (xs_type != DSS_XS_VOS && xs_type != DSS_XS_IOFW) + return -1; + + if (dss_helper_pool) + return xs_id - dss_sys_xs_nr; + + helper_per_tgt = dss_tgt_offload_xs_nr / dss_tgt_nr; + D_ASSERT(helper_per_tgt == 0 || helper_per_tgt == 1 || helper_per_tgt == 2); + + return (xs_id - dss_sys_xs_nr) / (helper_per_tgt + 1); +} + +static inline bool +has_crt_context(struct dss_xstream *dx) +{ + return dx->dx_comm || (xs_id2type(dx->dx_xs_id) == DSS_XS_SEC); +} + /* * The server handler ULT first sets CPU affinity, initialize the per-xstream * TLS, CRT(comm) context, NVMe context, creates the long-run ULTs (GC & NVMe @@ -348,6 +418,7 @@ dss_srv_handler(void *arg) struct dss_module_info *dmi; int rc; bool signal_caller = true; + unsigned int xs_type; rc = dss_xstream_set_affinity(dx); if (rc) @@ -371,7 +442,37 @@ dss_srv_handler(void *arg) (void)pthread_setname_np(pthread_self(), dx->dx_name); - if (dx->dx_comm) { + xs_type = xs_id2type(dx->dx_xs_id); + if (xs_type == DSS_XS_SEC) { + /* TODO: Create secondary cart context and register secondary RPC handler */ + D_ASSERTF(0, "Secondary cart context isn't supported\n"); +#if 0 + rc = crt_context_create_seconary(&dmi->dmi_ctxt); + if (rc != 0) { + D_ERROR("Failed to create secondary crt ctxt: "DF_RC"\n", DP_RC(rc)); + goto tls_fini; + } + + rc = crt_context_register_rpc_task(dmi->dmi_ctx, secondary_rpc_hdlr, NULL, dx); + if (rc != 0) { + D_ERROR("Failed to register secondary process cb "DF_RC"\n", DP_RC(rc)); + goto crt_destroy; + } + + rc = crt_context_idx(dmi->dmi_ctx, &dmi->dmi_ctx_id); + if (rc != 0) { + D_ERROR("Failed to get secondary context ID: rc "DF_RC"\n", DP_RC(rc)); + goto crt_destroy; + } + dx->dx_ctx_id = dmi->dmi_ctx_id; + + if (dx->dx_ctx_id != 0) { + D_ERROR("Invalid secondary context ID: %d\n", dx->dx_ctx_id); + rc = -DER_INVAL; + goto crt_destroy; + } +#endif + } else if (dx->dx_comm) { /* create private transport context */ rc = crt_context_create(&dmi->dmi_ctx); if (rc != 0) { @@ -401,7 +502,6 @@ dss_srv_handler(void *arg) /* * xs_id: 0 => SYS XS: ctx_id: 0 * xs_id: 1 => SWIM XS: ctx_id: 1 - * xs_id: 2 => DRPC XS: no ctx_id */ D_ASSERTF(dx->dx_ctx_id == dx->dx_xs_id, "incorrect ctx_id %d for xs_id %d\n", @@ -487,17 +587,17 @@ dss_srv_handler(void *arg) * respond to incoming pings. It is out of the scope of * dss_{thread,task}_collective. */ - if (dx->dx_xs_id != 1 /* DSS_XS_SWIM */) + if (xs_type != DSS_XS_SWIM) ABT_cond_wait(xstream_data.xd_ult_barrier, xstream_data.xd_mutex); ABT_mutex_unlock(xstream_data.xd_mutex); - if (dx->dx_comm) + if (has_crt_context(dx)) dx->dx_progress_started = true; signal_caller = false; /* main service progress loop */ for (;;) { - if (dx->dx_comm) { + if (has_crt_context(dx)) { rc = crt_progress(dmi->dmi_ctx, dx->dx_timeout); if (rc != 0 && rc != -DER_TIMEDOUT) { D_ERROR("failed to progress CART context: %d\n", @@ -514,7 +614,7 @@ dss_srv_handler(void *arg) ABT_thread_yield(); } - if (dx->dx_comm) + if (has_crt_context(dx)) dx->dx_progress_started = false; wait_all_exited(dx); @@ -528,7 +628,7 @@ dss_srv_handler(void *arg) tse_fini: tse_sched_fini(&dx->dx_sched_dsc); crt_destroy: - if (dx->dx_comm) + if (has_crt_context(dx)) crt_context_destroy(dmi->dmi_ctx, true); tls_fini: dss_tls_fini(dtc); @@ -618,73 +718,40 @@ dss_start_one_xstream(hwloc_cpuset_t cpus, int xs_id) struct dss_xstream *dx; ABT_thread_attr attr = ABT_THREAD_ATTR_NULL; int rc = 0; - bool comm; /* true to create cart ctx for RPC */ - int xs_offset = 0; + unsigned int xs_type = xs_id2type(xs_id); /** allocate & init xstream configuration data */ dx = dss_xstream_alloc(cpus); if (dx == NULL) return -DER_NOMEM; - /* Partial XS need the RPC communication ability - system XS, each - * main XS and its first offload XS (for IO dispatch). - * The 2nd offload XS(if exists) does not need RPC communication - * as it is only for EC/checksum/compress offloading. + /* + * SYS, SWIM, every main xstream (VOS) and its first offloading xstream + * (IOFW for IO forwarding) requires primary cart context. The 2nd offloading + * xstream (OFFLOAD for EC/csum/compress calculation) have no cart context. */ - if (dss_helper_pool) { - comm = (xs_id == 0) || /* DSS_XS_SYS */ - (xs_id == 1) || /* DSS_XS_SWIM */ - (xs_id >= dss_sys_xs_nr && - xs_id < (dss_sys_xs_nr + 2 * dss_tgt_nr)); - } else { - int helper_per_tgt; - - helper_per_tgt = dss_tgt_offload_xs_nr / dss_tgt_nr; - D_ASSERT(helper_per_tgt == 0 || - helper_per_tgt == 1 || - helper_per_tgt == 2); - - if ((xs_id >= dss_sys_xs_nr) && - (xs_id < (dss_sys_xs_nr + dss_tgt_nr + dss_tgt_offload_xs_nr))) - xs_offset = (xs_id - dss_sys_xs_nr) % (helper_per_tgt + 1); - else - xs_offset = -1; - - comm = (xs_id == 0) || /* DSS_XS_SYS */ - (xs_id == 1) || /* DSS_XS_SWIM */ - (xs_offset == 0) || /* main XS */ - (xs_offset == 1); /* first offload XS */ - } - dx->dx_xs_id = xs_id; dx->dx_ctx_id = -1; - dx->dx_comm = comm; - if (dss_helper_pool) { - dx->dx_main_xs = (xs_id >= dss_sys_xs_nr) && - (xs_id < (dss_sys_xs_nr + dss_tgt_nr)); - } else { - dx->dx_main_xs = (xs_id >= dss_sys_xs_nr) && (xs_offset == 0); - } + dx->dx_comm = (xs_type == DSS_XS_SYS) || (xs_type == DSS_XS_SWIM) || + (xs_type == DSS_XS_VOS) || (xs_type == DSS_XS_IOFW); + dx->dx_main_xs = (xs_type == DSS_XS_VOS); dx->dx_dsc_started = false; + dx->dx_tgt_id = xs_id2tgt(xs_id); /** * Generate name for each xstreams so that they can be easily identified * and monitored independently (e.g. via ps(1)) */ - dx->dx_tgt_id = dss_xs2tgt(xs_id); - if (xs_id < dss_sys_xs_nr) { - /** system xtreams are named daos_sys_$num */ - snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_SYS_XS_NAME_FMT, - xs_id); - } else if (dx->dx_main_xs) { - /** primary I/O xstreams are named daos_io_$tgtid */ - snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_IO_XS_NAME_FMT, - dx->dx_tgt_id); - } else { - /** offload xstreams are named daos_off_$num */ - snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_OFFLOAD_XS_NAME_FMT, - xs_id); - } + if (xs_id < dss_sys_xs_nr) + snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_SYS_XS_NAME_FMT, xs_id); + else if (xs_type == DSS_XS_VOS) + snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_IO_XS_NAME_FMT, dx->dx_tgt_id); + else if (xs_type == DSS_XS_IOFW) + snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_IOFW_XS_NAME_FMT, xs_id); + else if (xs_type == DSS_XS_OFFLOAD) + snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_OFFLOAD_XS_NAME_FMT, xs_id); + else + snprintf(dx->dx_name, DSS_XS_NAME_LEN, DSS_SEC_XS_NAME_FMT, xs_id); /** create ABT scheduler in charge of this xstream */ rc = dss_sched_init(dx); @@ -843,31 +910,149 @@ dss_xstream_is_busy(void) return cur_msec < (cntr->rc_active_time + 5000); } +struct core_assignment { + int *ca_assigned; /* Assigned core idx array */ + int ca_sub_off[DSS_XS_MAX]; /* Offset of ca_assigned for XS types */ + int ca_sub_nr[DSS_XS_MAX]; /* Number of assigned for XS types */ +}; + +static void +ca_assign(struct core_assignment *ca, unsigned int xs_type, unsigned idx) +{ + int sub_off, sub_nr; + + D_ASSERT(xs_type < DSS_XS_MAX); + + /* IOFW and OFFLOAD share the same sub-array */ + if (xs_type == DSS_XS_OFFLOAD) + xs_type = DSS_XS_IOFW; + sub_off = ca->ca_sub_off[xs_type]; + sub_nr = ca->ca_sub_nr[xs_type]; + + D_ASSERT(ca->ca_assigned[sub_off + sub_nr] == -1); + ca->ca_assigned[sub_off + sub_nr] = idx; + ca->ca_sub_nr[xs_type] += 1; +} + +static void +ca_free(struct core_assignment *ca) +{ + D_FREE(ca->ca_assigned); + D_FREE(ca); +} + +static struct core_assignment * +ca_alloc(unsigned int xs_nr) +{ + struct core_assignment *ca; + int i, off = 0; + + D_ALLOC_PTR(ca); + if (ca == NULL) + return NULL; + + D_ALLOC_ARRAY(ca->ca_assigned, xs_nr); + if (ca->ca_assigned == NULL) { + D_FREE(ca); + return NULL; + } + + for (i = 0; i < xs_nr; i++) + ca->ca_assigned[i] = -1; + + ca->ca_sub_off[DSS_XS_SYS] = off; + off += 1; + + ca->ca_sub_off[DSS_XS_SWIM] = off; + off += 1; + + ca->ca_sub_off[DSS_XS_DRPC] = off; + off += DRPC_XS_NR; + + ca->ca_sub_off[DSS_XS_VOS] = off; + off += dss_tgt_nr; + + ca->ca_sub_off[DSS_XS_IOFW] = off; + ca->ca_sub_off[DSS_XS_OFFLOAD] = off; + off += dss_tgt_offload_xs_nr; + + ca->ca_sub_off[DSS_XS_SEC] = off; + off += dss_sec_xs_nr; + D_ASSERT(off == xs_nr); + + return ca; +} + +/* Reuse a core form the cores assigned to 'assigned_type' xstream */ +static unsigned +ca_reuse_core(struct core_assignment *ca, unsigned int xs_id, unsigned int assigned_type) +{ + int sub_off, sub_nr, idx; + + sub_off = ca->ca_sub_off[assigned_type]; + sub_nr = ca->ca_sub_nr[assigned_type]; + + D_ASSERT(sub_nr > 0); + if (sub_nr > 1) + sub_off += (xs_id % sub_nr); + + idx = ca->ca_assigned[sub_off]; + D_ASSERT(idx != -1); + + return idx; +} + +/* There are not enough cores for DRPC and SEC xstreams */ +static inline bool +insufficient_cores() +{ + if (numa_obj) + return (dss_num_cores_numa_node < DSS_XS_NR_TOTAL); + else + return (dss_core_nr < DSS_XS_NR_TOTAL); +} + static int -dss_start_xs_id(int xs_id) +dss_start_xs_id(int xs_id, struct core_assignment *ca) { hwloc_obj_t obj; int rc; - int xs_core_offset; unsigned idx; + unsigned int xs_type; char *cpuset; + /* + * Rules for assigning cores to xstreams: + * + * - SYS always uses the first core; + * - SWIM always uses dedicated core (see dss_tgt_nr_get()); + * - DRPC uses dedicated core when possible, otherwise, share the SYS core; + * - VOS, IOFW and OFFLOAD xtreams always use dedicated cores; + * - SEC xstreams use dedicated cores when possible, otherwise, share cores + * with IOFW and OFFLOAD. + */ D_DEBUG(DB_TRACE, "start xs_id called for %d. ", xs_id); + + xs_type = xs_id2type(xs_id); /* if we are NUMA aware, use the NUMA information */ if (numa_obj) { - idx = hwloc_bitmap_first(core_allocation_bitmap); - if (idx == -1) { - D_ERROR("No core available for XS: %d", xs_id); - return -DER_INVAL; - } - D_DEBUG(DB_TRACE, - "Choosing next available core index %d.", idx); - /* - * All system XS will reuse the first XS' core, but - * the SWIM and DRPC XS will use separate core if enough cores - */ - if (xs_id > 1 || (xs_id == 0 && dss_core_nr > dss_tgt_nr)) + if (xs_type == DSS_XS_DRPC && insufficient_cores()) { + idx = ca_reuse_core(ca, xs_id, DSS_XS_SYS); + } else if (xs_type == DSS_XS_SEC && insufficient_cores()) { + if (dss_tgt_offload_xs_nr == 0) { + D_ERROR("No avail cores for secondary context\n"); + return -DER_INVAL; + } + idx = ca_reuse_core(ca, xs_id, DSS_XS_IOFW); + } else { + idx = hwloc_bitmap_first(core_allocation_bitmap); + if (idx == -1) { + D_ERROR("No core available for XS: %d", xs_id); + return -DER_INVAL; + } + D_DEBUG(DB_TRACE, "Choosing next available core index %d.", idx); hwloc_bitmap_clr(core_allocation_bitmap, idx); + } obj = hwloc_get_obj_by_depth(dss_topo, dss_core_depth, idx); if (obj == NULL) { @@ -880,25 +1065,52 @@ dss_start_xs_id(int xs_id) free(cpuset); } else { D_DEBUG(DB_TRACE, "Using non-NUMA aware core allocation\n"); - /* - * All system XS will use the first core, but - * the SWIM XS will use separate core if enough cores - */ - if (xs_id > 2) - xs_core_offset = xs_id - ((dss_core_nr > dss_tgt_nr) ? 1 : 2); - else if (xs_id == 1) - xs_core_offset = (dss_core_nr > dss_tgt_nr) ? 1 : 0; - else - xs_core_offset = 0; - obj = hwloc_get_obj_by_depth(dss_topo, dss_core_depth, - (xs_core_offset + dss_core_offset) - % dss_core_nr); + /* MOD operation on 'dss_core_nr' to support oversubscribe, see dss_tgt_nr_get() */ + switch (xs_type) { + case DSS_XS_SYS: + idx = dss_core_offset % dss_core_nr; + break; + case DSS_XS_SWIM: + idx = (dss_core_offset + 1) % dss_core_nr; + break; + case DSS_XS_DRPC: + if (insufficient_cores()) + idx = dss_core_offset % dss_core_nr; + else + idx = (dss_core_offset + 2) % dss_core_nr; + break; + case DSS_XS_VOS: + case DSS_XS_IOFW: + case DSS_XS_OFFLOAD: + if (insufficient_cores()) + idx = (dss_core_offset + xs_id - 1) % dss_core_nr; + else + idx = (dss_core_offset + xs_id) % dss_core_nr; + break; + case DSS_XS_SEC: + if (insufficient_cores()) { + if (dss_tgt_offload_xs_nr == 0) { + D_ERROR("No avail cores for secondary context\n"); + return -DER_INVAL; + } + idx = ca_reuse_core(ca, xs_id, DSS_XS_IOFW); + } else { + idx = (dss_core_offset + xs_id) % dss_core_nr; + } + break; + default: + D_ASSERTF(0, "Invalid XS type: %u\n", xs_type); + return -DER_INVAL; + } + + obj = hwloc_get_obj_by_depth(dss_topo, dss_core_depth, idx); if (obj == NULL) { D_ERROR("Null core returned by hwloc for XS %d\n", xs_id); return -DER_INVAL; } } + ca_assign(ca, xs_type, idx); rc = dss_start_one_xstream(obj->cpuset, xs_id); if (rc) @@ -910,9 +1122,9 @@ dss_start_xs_id(int xs_id) static int dss_xstreams_init(void) { - char *env; - int rc = 0; - int i, xs_id; + struct core_assignment *ca; + char *env; + int i, xs_id, rc = 0; D_ASSERT(dss_tgt_nr >= 1); @@ -957,10 +1169,16 @@ dss_xstreams_init(void) } xstream_data.xd_xs_nr = DSS_XS_NR_TOTAL; + ca = ca_alloc(DSS_XS_NR_TOTAL); + if (ca == NULL) { + D_ERROR("Failed to alloc core assignment data\n"); + return -DER_NOMEM; + } + /* start system service XS */ for (i = 0; i < dss_sys_xs_nr; i++) { xs_id = i; - rc = dss_start_xs_id(xs_id); + rc = dss_start_xs_id(xs_id, ca); if (rc) D_GOTO(out, rc); } @@ -968,7 +1186,7 @@ dss_xstreams_init(void) /* start main IO service XS */ for (i = 0; i < dss_tgt_nr; i++) { xs_id = DSS_MAIN_XS_ID(i); - rc = dss_start_xs_id(xs_id); + rc = dss_start_xs_id(xs_id, ca); if (rc) D_GOTO(out, rc); } @@ -978,7 +1196,7 @@ dss_xstreams_init(void) if (dss_helper_pool) { for (i = 0; i < dss_tgt_offload_xs_nr; i++) { xs_id = dss_sys_xs_nr + dss_tgt_nr + i; - rc = dss_start_xs_id(xs_id); + rc = dss_start_xs_id(xs_id, ca); if (rc) D_GOTO(out, rc); } @@ -992,7 +1210,7 @@ dss_xstreams_init(void) for (j = 0; j < dss_tgt_offload_xs_nr / dss_tgt_nr; j++) { xs_id = DSS_MAIN_XS_ID(i) + j + 1; - rc = dss_start_xs_id(xs_id); + rc = dss_start_xs_id(xs_id, ca); if (rc) D_GOTO(out, rc); } @@ -1000,9 +1218,18 @@ dss_xstreams_init(void) } } + /* Start secondary cart context XS if any */ + for (i = 0; i < dss_sec_xs_nr; i++) { + xs_id = dss_sys_xs_nr + dss_tgt_nr + dss_tgt_offload_xs_nr + i; + rc = dss_start_xs_id(xs_id, ca); + if (rc) + D_GOTO(out, rc); + } + D_DEBUG(DB_TRACE, "%d execution streams successfully started " "(first core %d)\n", dss_tgt_nr, dss_core_offset); out: + ca_free(ca); return rc; } diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index 278a77d5494..56255a4e3bd 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -77,7 +77,7 @@ struct dss_xstream { /* Cart progress timeout in micro-seconds */ unsigned int dx_timeout; bool dx_main_xs; /* true for main XS */ - bool dx_comm; /* true with cart context */ + bool dx_comm; /* true with primary cart context */ bool dx_dsc_started; /* DSC progress ULT started */ bool dx_progress_started; /* Network poll started */ }; @@ -117,6 +117,8 @@ extern int dss_num_cores_numa_node; extern unsigned int dss_tgt_offload_xs_nr; /** number of system XS */ extern unsigned int dss_sys_xs_nr; +/** Number of secondary cart context XS */ +extern unsigned int dss_sec_xs_nr; /** Flag of helper XS as a pool */ extern bool dss_helper_pool; @@ -270,7 +272,7 @@ void ds_iv_fini(void); /** Total number of XS */ #define DSS_XS_NR_TOTAL \ - (dss_sys_xs_nr + dss_tgt_nr + dss_tgt_offload_xs_nr) + (dss_sys_xs_nr + dss_tgt_nr + dss_tgt_offload_xs_nr + dss_sec_xs_nr) /** Total number of cart contexts created */ #define DSS_CTX_NR_TOTAL \ (DAOS_TGT0_OFFSET + dss_tgt_nr + \ @@ -283,31 +285,4 @@ void ds_iv_fini(void); dss_tgt_nr) + 1) + dss_sys_xs_nr)) -/** - * get the VOS target ID of xstream. - * - * \param[in] xs_id xstream ID - * - * \return VOS target ID (-1 for system XS). - */ -static inline int -dss_xs2tgt(int xs_id) -{ - D_ASSERTF(xs_id >= 0 && xs_id < DSS_XS_NR_TOTAL, - "invalid xs_id %d, dss_tgt_nr %d, " - "dss_tgt_offload_xs_nr %d.\n", - xs_id, dss_tgt_nr, dss_tgt_offload_xs_nr); - if (dss_helper_pool) { - if (xs_id < dss_sys_xs_nr || - xs_id >= dss_sys_xs_nr + dss_tgt_nr) - return -1; - return xs_id - dss_sys_xs_nr; - } - - if (xs_id < dss_sys_xs_nr) - return -1; - return (xs_id - dss_sys_xs_nr) / - (dss_tgt_offload_xs_nr / dss_tgt_nr + 1); -} - #endif /* __DAOS_SRV_INTERNAL__ */ diff --git a/src/include/daos/rpc.h b/src/include/daos/rpc.h index 3ffd1d0d13a..ba93c8e9458 100644 --- a/src/include/daos/rpc.h +++ b/src/include/daos/rpc.h @@ -93,7 +93,7 @@ enum daos_rpc_type { }; /** DAOS_TGT0_OFFSET is target 0's cart context offset */ -#define DAOS_TGT0_OFFSET (2) +#define DAOS_TGT0_OFFSET (2) /* 0 for SYS0, 1 for SWIM */ /** The cart context index of target index */ #define DAOS_IO_CTX_ID(tgt_idx) ((tgt_idx) + DAOS_TGT0_OFFSET) diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index 6c8d2ad0ec0..0bfc132491a 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -479,6 +479,9 @@ enum dss_xs_type { DSS_XS_SWIM = 4, /** drpc listener */ DSS_XS_DRPC = 5, + /** secondary cart context */ + DSS_XS_SEC = 6, + DSS_XS_MAX = 7, }; int dss_parameters_set(unsigned int key_id, uint64_t value); From c647c56306e9bbd7461358c6693f631a3367a8f9 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 1 Apr 2022 13:55:16 -0600 Subject: [PATCH 04/28] DAOS-9623 control: Use comma separator for providers (#8578) Use a comma to separate the multiple provider list instead of a space. This matches up with what CART is expecting. Signed-off-by: Kris Jacque --- src/control/server/engine/config.go | 2 +- src/control/server/engine/config_test.go | 41 +++++++++++++----------- src/control/server/server_utils_test.go | 5 +-- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index 272e5337a1e..f419ded386d 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -24,7 +24,7 @@ const ( maxHelperStreamCount = 2 // MultiProviderSeparator delineates between providers in a multi-provider config. - MultiProviderSeparator = " " + MultiProviderSeparator = "," ) // FabricConfig encapsulates networking fabric configuration. diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 98f9addc7bd..f0ff8193848 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -12,6 +12,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -526,6 +527,10 @@ func TestConfig_Validation(t *testing.T) { } } +func multiProviderString(comp ...string) string { + return strings.Join(comp, MultiProviderSeparator) +} + func TestConfig_FabricValidation(t *testing.T) { for name, tc := range map[string]struct { cfg FabricConfig @@ -569,31 +574,31 @@ func TestConfig_FabricValidation(t *testing.T) { }, "multi provider/interface/port ok": { cfg: FabricConfig{ - Provider: "foo bar", - Interface: "baz net", - InterfacePort: "42 128", + Provider: multiProviderString("foo", "bar"), + Interface: multiProviderString("baz", "net"), + InterfacePort: multiProviderString("42", "128"), }, }, "mismatched num providers": { cfg: FabricConfig{ Provider: "foo", - Interface: "bar baz", - InterfacePort: "42 128", + Interface: multiProviderString("baz", "net"), + InterfacePort: multiProviderString("42", "128"), }, expErr: errors.New("same number"), }, "mismatched num interfaces": { cfg: FabricConfig{ - Provider: "foo bar", + Provider: multiProviderString("foo", "bar"), Interface: "baz", - InterfacePort: "42 128", + InterfacePort: multiProviderString("42", "128"), }, expErr: errors.New("same number"), }, "mismatched num ports": { cfg: FabricConfig{ - Provider: "foo bar", - Interface: "baz net", + Provider: multiProviderString("foo", "bar"), + Interface: multiProviderString("baz", "net"), InterfacePort: "42", }, expErr: errors.New("same number"), @@ -825,13 +830,13 @@ func TestFabricConfig_GetProviders(t *testing.T) { }, "multi": { cfg: &FabricConfig{ - Provider: "p1 p2 p3", + Provider: multiProviderString("p1", "p2", "p3"), }, expProviders: []string{"p1", "p2", "p3"}, }, "excessive whitespace": { cfg: &FabricConfig{ - Provider: " p1 p2 p3", + Provider: multiProviderString(" ", " p1 ", " p2 ", "p3"), }, expProviders: []string{"p1", "p2", "p3"}, }, @@ -868,7 +873,7 @@ func TestFabricConfig_GetPrimaryProvider(t *testing.T) { }, "multi": { cfg: &FabricConfig{ - Provider: "p1 p2 p3", + Provider: multiProviderString("p1", "p2", "p3"), }, expProvider: "p1", }, @@ -903,13 +908,13 @@ func TestFabricConfig_GetInterfaces(t *testing.T) { }, "multi": { cfg: &FabricConfig{ - Interface: "net1 net2 net3", + Interface: multiProviderString("net1", "net2", "net3"), }, expInterfaces: []string{"net1", "net2", "net3"}, }, "excessive whitespace": { cfg: &FabricConfig{ - Interface: " net1 net2 net3 ", + Interface: multiProviderString(" net1 ", "", " net2", "net3", ""), }, expInterfaces: []string{"net1", "net2", "net3"}, }, @@ -946,7 +951,7 @@ func TestFabricConfig_GetPrimaryInterface(t *testing.T) { }, "multi": { cfg: &FabricConfig{ - Interface: "net0 net1 net3", + Interface: multiProviderString("net0", "net1", "net2", "net3"), }, expInterface: "net0", }, @@ -981,19 +986,19 @@ func TestFabricConfig_GetInterfacePorts(t *testing.T) { }, "multi": { cfg: &FabricConfig{ - InterfacePort: "1234 5678 9012", + InterfacePort: multiProviderString("1234", "5678", "9012"), }, expPorts: []int{1234, 5678, 9012}, }, "excessive whitespace": { cfg: &FabricConfig{ - InterfacePort: " 1234 5678 9012 ", + InterfacePort: multiProviderString("1234 ", " 5678 ", "", " 9012"), }, expPorts: []int{1234, 5678, 9012}, }, "non-integer port": { cfg: &FabricConfig{ - InterfacePort: "1234 a123", + InterfacePort: multiProviderString("1234", "a123"), }, expErr: errors.New("strconv.Atoi"), }, diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 847f6c25893..7a55969acad 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -10,6 +10,7 @@ import ( "fmt" "net" "os/user" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -779,9 +780,9 @@ func TestServer_getNetDevClass(t *testing.T) { }, "multi interface": { configA: configA(). - WithFabricInterface("eth0 ib0"), + WithFabricInterface(strings.Join([]string{"eth0", "ib0"}, engine.MultiProviderSeparator)), configB: configB(). - WithFabricInterface("eth1 ib1"), + WithFabricInterface(strings.Join([]string{"eth1", "ib1"}, engine.MultiProviderSeparator)), expNetDevCls: []hardware.NetDevClass{hardware.Ether, hardware.Infiniband}, }, "mismatching net dev class with primary server as ib0 / Infiniband": { From 2cbaa4037cf9423dee7d104ae159e24cbed11b57 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 4 Apr 2022 09:03:44 -0600 Subject: [PATCH 05/28] DAOS-9623 control: Add secondary context count to config (#8516) The new parameter in the server config, secondary_provider_endpoints, defines the number of secondary CART contexts to be created by each engine for each secondary fabric provider. The primary provider is not affected. Signed-off-by: Kris Jacque --- src/control/server/config/server.go | 10 ++ src/control/server/config/server_test.go | 76 ++++++++++++ src/control/server/engine/config.go | 70 ++++++++--- src/control/server/engine/config_test.go | 144 +++++++++++++++++++++++ src/control/server/engine/tags.go | 26 +++- src/control/server/engine/tags_test.go | 35 +++--- src/control/server/storage/config.go | 2 +- src/engine/init.c | 6 +- utils/config/daos_server.yml | 11 ++ 9 files changed, 348 insertions(+), 32 deletions(-) diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index c5f87f02b29..ef3c9415992 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -135,6 +135,16 @@ func (cfg *Server) WithCrtTimeout(timeout uint32) *Server { return cfg } +// WithNumSecondaryEndpoints sets the number of network endpoints for each engine's secondary +// provider. +func (cfg *Server) WithNumSecondaryEndpoints(nr []int) *Server { + cfg.Fabric.NumSecondaryEndpoints = nr + for _, engine := range cfg.Engines { + engine.Fabric.Update(cfg.Fabric) + } + return cfg +} + // NB: In order to ease maintenance, the set of chained config functions // which modify nested engine configurations should be kept above this // one as a reference for which things should be set/updated in the next diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index bdd7fba344b..3b43d3d6789 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -211,6 +211,7 @@ func TestServerConfig_Constructed(t *testing.T) { WithFabricProvider("ofi+verbs"). WithCrtCtxShareAddr(0). WithCrtTimeout(30). + WithNumSecondaryEndpoints([]int{2}). WithAccessPoints("hostname1"). WithFaultCb("./.daos/fd_callback"). WithFaultPath("/vcdu0/rack1/hostname"). @@ -240,6 +241,7 @@ func TestServerConfig_Constructed(t *testing.T) { WithFabricProvider("ofi+verbs"). WithCrtCtxShareAddr(0). WithCrtTimeout(30). + WithNumSecondaryEndpoints([]int{2}). WithPinnedNumaNode(0). WithBypassHealthChk(&bypass). WithEnvVars("CRT_TIMEOUT=30"). @@ -269,6 +271,7 @@ func TestServerConfig_Constructed(t *testing.T) { WithFabricProvider("ofi+verbs"). WithCrtCtxShareAddr(0). WithCrtTimeout(30). + WithNumSecondaryEndpoints([]int{2}). WithPinnedNumaNode(1). WithBypassHealthChk(&bypass). WithEnvVars("CRT_TIMEOUT=100"). @@ -288,6 +291,79 @@ func TestServerConfig_Constructed(t *testing.T) { } } +func TestServerConfig_updateServerConfig(t *testing.T) { + for name, tc := range map[string]struct { + cfg *Server + nilEngCfg bool + expEngCfg *engine.Config + }{ + "nil engCfg": { + cfg: &Server{ + SystemName: "name", + }, + nilEngCfg: true, + expEngCfg: &engine.Config{}, + }, + "basic": { + cfg: &Server{ + SystemName: "name", + SocketDir: "socketdir", + Modules: "modules", + EnableHotplug: true, + Fabric: engine.FabricConfig{ + Provider: "provider", + Interface: "iface", + InterfacePort: "1111", + NumSecondaryEndpoints: []int{2, 3, 4}, + }, + }, + expEngCfg: &engine.Config{ + SystemName: "name", + SocketDir: "socketdir", + Modules: "modules", + Storage: storage.Config{ + EnableHotplug: true, + }, + Fabric: engine.FabricConfig{ + Provider: "provider", + Interface: "iface", + InterfacePort: "1111", + NumSecondaryEndpoints: []int{2, 3, 4}, + }, + }, + }, + "multiprovider": { + cfg: &Server{ + SystemName: "name", + Fabric: engine.FabricConfig{ + Provider: "p1 p2 p3", + NumSecondaryEndpoints: []int{2, 3, 4}, + }, + }, + expEngCfg: &engine.Config{ + SystemName: "name", + Fabric: engine.FabricConfig{ + Provider: "p1 p2 p3", + NumSecondaryEndpoints: []int{2, 3, 4}, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + var engCfg *engine.Config + if !tc.nilEngCfg { + engCfg = &engine.Config{} + } + + tc.cfg.updateServerConfig(&engCfg) + + if diff := cmp.Diff(tc.expEngCfg, engCfg); diff != "" { + t.Fatalf("(-want, +got): %s", diff) + } + }) + } +} + func TestServerConfig_Validation(t *testing.T) { testDir, cleanup := CreateTestDir(t) defer cleanup() diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index f419ded386d..7b9d95d2ded 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -21,7 +21,9 @@ import ( ) const ( - maxHelperStreamCount = 2 + maxHelperStreamCount = 2 + numPrimaryProviders = 1 + defaultNumSecondaryEndpoints = 1 // MultiProviderSeparator delineates between providers in a multi-provider config. MultiProviderSeparator = "," @@ -29,14 +31,15 @@ const ( // FabricConfig encapsulates networking fabric configuration. type FabricConfig struct { - Provider string `yaml:"provider,omitempty" cmdEnv:"CRT_PHY_ADDR_STR"` - Interface string `yaml:"fabric_iface,omitempty" cmdEnv:"OFI_INTERFACE"` - InterfacePort string `yaml:"fabric_iface_port,omitempty" cmdEnv:"OFI_PORT"` - NumaNodeIndex uint `yaml:"-"` - BypassHealthChk *bool `yaml:"bypass_health_chk,omitempty" cmdLongFlag:"--bypass_health_chk" cmdShortFlag:"-b"` - CrtCtxShareAddr uint32 `yaml:"crt_ctx_share_addr,omitempty" cmdEnv:"CRT_CTX_SHARE_ADDR"` - CrtTimeout uint32 `yaml:"crt_timeout,omitempty" cmdEnv:"CRT_TIMEOUT"` - DisableSRX bool `yaml:"disable_srx,omitempty" cmdEnv:"FI_OFI_RXM_USE_SRX,invertBool,intBool"` + Provider string `yaml:"provider,omitempty" cmdEnv:"CRT_PHY_ADDR_STR"` + Interface string `yaml:"fabric_iface,omitempty" cmdEnv:"OFI_INTERFACE"` + InterfacePort string `yaml:"fabric_iface_port,omitempty" cmdEnv:"OFI_PORT"` + NumaNodeIndex uint `yaml:"-"` + BypassHealthChk *bool `yaml:"bypass_health_chk,omitempty" cmdLongFlag:"--bypass_health_chk" cmdShortFlag:"-b"` + CrtCtxShareAddr uint32 `yaml:"crt_ctx_share_addr,omitempty" cmdEnv:"CRT_CTX_SHARE_ADDR"` + CrtTimeout uint32 `yaml:"crt_timeout,omitempty" cmdEnv:"CRT_TIMEOUT"` + NumSecondaryEndpoints []int `yaml:"secondary_provider_endpoints,omitempty" cmdLongFlag:"--nr_sec_ctx,nonzero" cmdShortFlag:"-S,nonzero"` + DisableSRX bool `yaml:"disable_srx,omitempty" cmdEnv:"FI_OFI_RXM_USE_SRX,invertBool,intBool"` } // GetPrimaryProvider parses the primary provider from the Provider string. @@ -76,6 +79,15 @@ func splitMultiProviderStr(str string) []string { return result } +// GetNumProviders gets the number of fabric providers configured. +func (fc *FabricConfig) GetNumProviders() int { + providers, err := fc.GetProviders() + if err != nil { + return 0 + } + return len(providers) +} + // GetPrimaryInterface parses the primary fabric interface from the Interface string. func (fc *FabricConfig) GetPrimaryInterface() (string, error) { interfaces, err := fc.GetInterfaces() @@ -139,14 +151,25 @@ func (fc *FabricConfig) Update(other FabricConfig) { if fc.CrtTimeout == 0 { fc.CrtTimeout = other.CrtTimeout } + if len(fc.NumSecondaryEndpoints) == 0 { + fc.setNumSecondaryEndpoints(other.NumSecondaryEndpoints) + } +} + +func (fc *FabricConfig) setNumSecondaryEndpoints(other []int) { + if len(other) == 0 { + // Set defaults + numSecProv := fc.GetNumProviders() - numPrimaryProviders + for i := 0; i < numSecProv; i++ { + other = append(other, defaultNumSecondaryEndpoints) + } + } + fc.NumSecondaryEndpoints = other } // Validate ensures that the configuration meets minimum standards. func (fc *FabricConfig) Validate() error { - prov, err := fc.GetProviders() - if err != nil { - return err - } + numProv := fc.GetNumProviders() interfaces, err := fc.GetInterfaces() if err != nil { @@ -164,10 +187,23 @@ func (fc *FabricConfig) Validate() error { } } - if len(prov) != len(interfaces) || len(prov) != len(ports) { + if len(interfaces) != numProv || len(ports) != numProv { return errors.Errorf("provider, fabric_iface and fabric_iface_port must include the same number of items delimited by %q", MultiProviderSeparator) } + numSecProv := numProv - numPrimaryProviders + if numSecProv > 0 { + if len(fc.NumSecondaryEndpoints) != 0 && len(fc.NumSecondaryEndpoints) != numSecProv { + return errors.New("secondary_provider_endpoints must have one value for each secondary provider") + } + + for _, nrCtx := range fc.NumSecondaryEndpoints { + if nrCtx < 1 { + return errors.Errorf("all values in secondary_provider_endpoints must be > 0") + } + } + } + return nil } @@ -531,6 +567,12 @@ func (c *Config) WithCrtTimeout(timeout uint32) *Config { return c } +// WithNumSecondaryEndpoints sets the number of network endpoints for each secondary provider. +func (c *Config) WithNumSecondaryEndpoints(nr []int) *Config { + c.Fabric.NumSecondaryEndpoints = nr + return c +} + // WithTargetCount sets the number of VOS targets to run on this instance. func (c *Config) WithTargetCount(count int) *Config { c.TargetCount = count diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index f0ff8193848..5bf228e83ce 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -603,6 +603,41 @@ func TestConfig_FabricValidation(t *testing.T) { }, expErr: errors.New("same number"), }, + "nr secondary ctxs less than 1": { + cfg: FabricConfig{ + Provider: multiProviderString("foo", "bar"), + Interface: multiProviderString("baz", "net"), + InterfacePort: multiProviderString("42", "128"), + NumSecondaryEndpoints: []int{0}, + }, + expErr: errors.New("must be > 0"), + }, + "nr secondary ctxs okay": { + cfg: FabricConfig{ + Provider: multiProviderString("foo", "bar", "baz"), + Interface: multiProviderString("net0", "net1", "net2"), + InterfacePort: multiProviderString("42", "128", "256"), + NumSecondaryEndpoints: []int{1, 2}, + }, + }, + "too many nr secondary ctxs": { + cfg: FabricConfig{ + Provider: multiProviderString("foo", "bar", "baz"), + Interface: multiProviderString("net0", "net1", "net2"), + InterfacePort: multiProviderString("42", "128", "256"), + NumSecondaryEndpoints: []int{1, 2, 3}, + }, + expErr: errors.New("must have one value for each"), + }, + "too few nr secondary ctxs": { + cfg: FabricConfig{ + Provider: multiProviderString("foo", "bar", "baz"), + Interface: multiProviderString("net0", "net1", "net2"), + InterfacePort: multiProviderString("42", "128", "256"), + NumSecondaryEndpoints: []int{1}, + }, + expErr: errors.New("must have one value for each"), + }, } { t.Run(name, func(t *testing.T) { gotErr := tc.cfg.Validate() @@ -852,6 +887,34 @@ func TestFabricConfig_GetProviders(t *testing.T) { } } +func TestFabricConfig_GetNumProviders(t *testing.T) { + for name, tc := range map[string]struct { + cfg *FabricConfig + expNum int + }{ + "nil": {}, + "empty": { + cfg: &FabricConfig{}, + }, + "single": { + cfg: &FabricConfig{ + Provider: "p1", + }, + expNum: 1, + }, + "multi": { + cfg: &FabricConfig{ + Provider: multiProviderString("p1", "p2", "p3", "p4"), + }, + expNum: 4, + }, + } { + t.Run(name, func(t *testing.T) { + common.AssertEqual(t, tc.expNum, tc.cfg.GetNumProviders(), "") + }) + } +} + func TestFabricConfig_GetPrimaryProvider(t *testing.T) { for name, tc := range map[string]struct { cfg *FabricConfig @@ -1013,3 +1076,84 @@ func TestFabricConfig_GetInterfacePorts(t *testing.T) { }) } } + +func TestFabricConfig_Update(t *testing.T) { + for name, tc := range map[string]struct { + fc *FabricConfig + other FabricConfig + expResult *FabricConfig + }{ + "set all": { + fc: &FabricConfig{}, + other: FabricConfig{ + Provider: "p", + Interface: "i", + InterfacePort: "1234", + CrtCtxShareAddr: 2, + CrtTimeout: 3, + NumSecondaryEndpoints: []int{1}, + }, + expResult: &FabricConfig{ + Provider: "p", + Interface: "i", + InterfacePort: "1234", + CrtCtxShareAddr: 2, + CrtTimeout: 3, + NumSecondaryEndpoints: []int{1}, + }, + }, + "already set": { + fc: &FabricConfig{ + Provider: "p", + Interface: "i", + InterfacePort: "1234", + CrtCtxShareAddr: 2, + CrtTimeout: 3, + NumSecondaryEndpoints: []int{1}, + }, + other: FabricConfig{ + Provider: "q", + Interface: "h", + InterfacePort: "5678", + CrtCtxShareAddr: 3, + CrtTimeout: 4, + NumSecondaryEndpoints: []int{5}, + }, + expResult: &FabricConfig{ + Provider: "p", + Interface: "i", + InterfacePort: "1234", + CrtCtxShareAddr: 2, + CrtTimeout: 3, + NumSecondaryEndpoints: []int{1}, + }, + }, + "default secondary ctx": { + fc: &FabricConfig{}, + other: FabricConfig{ + Provider: multiProviderString("one", "two", "three"), + }, + expResult: &FabricConfig{ + Provider: multiProviderString("one", "two", "three"), + NumSecondaryEndpoints: []int{1, 1}, + }, + }, + "no secondary ctx": { + fc: &FabricConfig{}, + other: FabricConfig{ + Provider: "one", + }, + expResult: &FabricConfig{ + Provider: "one", + }, + }, + } { + t.Run(name, func(t *testing.T) { + tc.fc.Update(tc.other) + + if diff := cmp.Diff(tc.expResult, tc.fc); diff != "" { + t.Fatalf("(-want, +got):\n%s", diff) + } + }) + } +} diff --git a/src/control/server/engine/tags.go b/src/control/server/engine/tags.go index a2016d8d88a..ed3e1122f99 100644 --- a/src/control/server/engine/tags.go +++ b/src/control/server/engine/tags.go @@ -23,6 +23,7 @@ const ( nonZero = "nonzero" // only set if non-zero value invertBool = "invertBool" // invert the value before setting intBool = "intBool" // convert the bool to an int + sliceCount = "count" // use the length of a slice rather than its contents ) type ( @@ -155,7 +156,14 @@ func parseCmdTags(in interface{}, tagFilter string, joiner joinFn, seenRefs refM if fVal.Len() == 0 && opts.hasOpt(nonZero) { continue } - strVal := strconv.Itoa(fVal.Len()) + + var strVal string + if opts.hasOpt(sliceCount) { + strVal = fmt.Sprintf("%d", fVal.Len()) + } else { + strVal = getSliceStr(fVal) + } + out = append(out, joiner(tag, strVal)...) case reflect.Uintptr, reflect.Ptr: if fVal.IsNil() { @@ -200,3 +208,19 @@ func parseCmdTags(in interface{}, tagFilter string, joiner joinFn, seenRefs refM return } + +func getSliceStr(val reflect.Value) string { + iVal := val.Interface() + + strSlice := make([]string, 0) + switch slice := iVal.(type) { + case []int: + for _, n := range slice { + strSlice = append(strSlice, fmt.Sprintf("%d", n)) + } + default: + return strconv.Itoa(val.Len()) + } + + return strings.Join(strSlice, ",") +} diff --git a/src/control/server/engine/tags_test.go b/src/control/server/engine/tags_test.go index a5e0f1854d9..71b789adf6d 100644 --- a/src/control/server/engine/tags_test.go +++ b/src/control/server/engine/tags_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2021 Intel Corporation. +// (C) Copyright 2019-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -31,8 +31,9 @@ type testConfig struct { BoolIntEnv bool `cmdEnv:"BOOL_INT_ENV,intBool"` IntPtrOpt *int `cmdShortFlag:"-p" cmdLongFlag:"--int_ptr"` UnsetIntPtrOpt *int `cmdShortFlag:"-r" cmdLongFlag:"--unset_int_ptr"` - SliceOpt []int `cmdShortFlag:"-S,nonzero" cmdLongFlag:"--slice_length,nonzero"` - SliceOptEmpty []int `cmdShortFlag:"-E,nonzero" cmdLongFlag:"--slice_length_empty,nonzero"` + SliceCountOpt []int `cmdShortFlag:"-C,nonzero,count" cmdLongFlag:"--slice_count,nonzero,count"` + SliceOpt []int `cmdShortFlag:"-S,nonzero" cmdLongFlag:"--slice,nonzero"` + SliceOptEmpty []int `cmdShortFlag:"-E,nonzero" cmdLongFlag:"--slice_empty,nonzero"` Nested subConfig NestedPointer *subConfig NilNestedPointer *subConfig @@ -44,15 +45,16 @@ func intRef(in int) *int { } var testStruct = &testConfig{ - IntOpt: -1, - UintOpt: 1, - StringOpt: "stringOpt", - SetBoolOpt: true, - IntEnv: -1, - StringEnv: "stringEnv", - SetBoolEnv: true, - IntPtrOpt: intRef(4), - SliceOpt: []int{0, 1, 2}, + IntOpt: -1, + UintOpt: 1, + StringOpt: "stringOpt", + SetBoolOpt: true, + IntEnv: -1, + StringEnv: "stringEnv", + SetBoolEnv: true, + IntPtrOpt: intRef(4), + SliceCountOpt: []int{0, 1, 2}, + SliceOpt: []int{0, 1, 2, 3}, Nested: subConfig{ NestedIntOpt: 2, }, @@ -73,7 +75,8 @@ func TestParseLongFlags(t *testing.T) { "--string=stringOpt", "--set_bool", "--int_ptr=4", - "--slice_length=3", + "--slice_count=3", + "--slice=0,1,2,3", "--nested_int=2", "--nested_int=3", } @@ -94,7 +97,8 @@ func TestParseShortFlags(t *testing.T) { "-s", "stringOpt", "-b", "-p", "4", - "-S", "3", + "-C", "3", + "-S", "0,1,2,3", "-n", "2", "-n", "3", } @@ -135,7 +139,8 @@ func TestCircularRef(t *testing.T) { "-s", "stringOpt", "-b", "-p", "4", - "-S", "3", + "-C", "3", + "-S", "0,1,2,3", "-n", "2", "-n", "3", } diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index 783227c121b..586f559e07e 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -569,7 +569,7 @@ func parsePCIBusRange(numRange string, bitSize int) (uint8, uint8, error) { } type Config struct { - Tiers TierConfigs `yaml:"storage" cmdLongFlag:"--storage_tiers,nonzero" cmdShortFlag:"-T,nonzero"` + Tiers TierConfigs `yaml:"storage" cmdLongFlag:"--storage_tiers,nonzero,count" cmdShortFlag:"-T,nonzero,count"` ConfigOutputPath string `yaml:"-" cmdLongFlag:"--nvme" cmdShortFlag:"-n"` VosEnv string `yaml:"-" cmdEnv:"VOS_BDEV_CLASS"` EnableHotplug bool `yaml:"-"` diff --git a/src/engine/init.c b/src/engine/init.c index be12ff4e40a..b60114adda9 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -912,6 +912,7 @@ parse(int argc, char **argv) { "instance_idx", required_argument, NULL, 'I' }, { "bypass_health_chk", no_argument, NULL, 'b' }, { "storage_tiers", required_argument, NULL, 'T' }, + { "nr_sec_ctx", required_argument, NULL, 'S' }, { NULL, 0, NULL, 0} }; int rc = 0; @@ -919,7 +920,7 @@ parse(int argc, char **argv) /* load all of modules by default */ sprintf(modules, "%s", MODULE_LIST); - while ((c = getopt_long(argc, argv, "c:d:f:g:hi:m:n:p:r:H:t:s:x:I:bT:", + while ((c = getopt_long(argc, argv, "c:d:f:g:hi:m:n:p:r:H:t:s:x:I:bT:S:", opts, NULL)) != -1) { switch (c) { case 'm': @@ -988,6 +989,9 @@ parse(int argc, char **argv) rc = -DER_INVAL; } break; + case 'S': + rc = arg_strtoul(optarg, &dss_sec_xs_nr, "\"-S\""); + break; default: usage(argv[0], stderr); rc = -DER_INVAL; diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 6186be67ae0..03816314aab 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -193,6 +193,17 @@ ##nr_hugepages: -1 # # +## Number of network endpoints per secondary provider +# +## Specifies the number of secondary network endpoints each engine will create +## for each additional fabric provider after the first. This only applies when +## running in multi-provider mode. +# +## default: 1 +# +#secondary_provider_endpoints: [2] +# +# ## Force specific debug mask for daos_server (control plane). ## By default, just use the default debug mask used by daos_server. ## Mask specifies minimum level of message significance to pass to logger. From 89b906e61db9a833fb2ed9ef359c0b5b6ef203aa Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 4 Apr 2022 09:06:01 -0600 Subject: [PATCH 06/28] DAOS-9623 control: Check multiprovider interfaces (#8580) - Check all the fabric interfaces in the server. Test-tag: pr daily Signed-off-by: Kris Jacque --- src/control/server/server.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/control/server/server.go b/src/control/server/server.go index 9a0e36ee883..0b13578ef9c 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -56,10 +56,17 @@ func processConfig(log *logging.LeveledLogger, cfg *config.Server, fis *hardware return iface, nil } for _, ec := range cfg.Engines { - if err := checkFabricInterface(ec.Fabric.Interface, lookupNetIF); err != nil { + fabricIFs, err := ec.Fabric.GetInterfaces() + if err != nil { return nil, err } + for _, iface := range fabricIFs { + if err := checkFabricInterface(iface, lookupNetIF); err != nil { + return nil, err + } + } + if err := updateFabricEnvars(log, ec, fis); err != nil { return nil, errors.Wrap(err, "update engine fabric envars") } From 4e774791d106aac6a9b6ab7bdc9acfa9ae207d08 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Wed, 6 Apr 2022 11:22:20 -0600 Subject: [PATCH 07/28] DAOS-9623 agent: Let client specify desired fabric iface (#8619) * DAOS-9623 agent: Let client specify desired fabric iface If the client has an OFI_INTERFACE/OFI_DOMAIN specified, the agent should select the GetAttachInfo payload for the provider that matches their choice. - Include OFI_INTERFACE/OFI_DOMAIN in client GetAttachInfo request. - If the requested interface/domain doesn't exist in the system, the primary provider is used. - The client requested interface and domain will be used even if not detected or they don't seem to support the configured provider. The mismatch will be logged as an error to make errors with the env variables easier to debug. Features: control Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/fabric.go | 124 ++++- src/control/cmd/daos_agent/fabric_test.go | 541 +++++++++++++++++-- src/control/cmd/daos_agent/infocache.go | 4 +- src/control/cmd/daos_agent/infocache_test.go | 41 +- src/control/cmd/daos_agent/mgmt_rpc.go | 140 +++-- src/control/cmd/daos_agent/mgmt_rpc_test.go | 181 +++++-- src/control/common/proto/mgmt/svc.pb.go | 149 ++--- src/mgmt/cli_mgmt.c | 44 +- src/mgmt/svc.pb-c.c | 32 +- src/mgmt/svc.pb-c.h | 10 +- src/proto/mgmt/svc.proto | 2 + 11 files changed, 1000 insertions(+), 268 deletions(-) diff --git a/src/control/cmd/daos_agent/fabric.go b/src/control/cmd/daos_agent/fabric.go index 93ac7b722de..10d3ed317b6 100644 --- a/src/control/cmd/daos_agent/fabric.go +++ b/src/control/cmd/daos_agent/fabric.go @@ -14,6 +14,7 @@ import ( "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/lib/hardware" "github.com/daos-stack/daos/src/control/logging" ) @@ -32,10 +33,16 @@ type FabricInterface struct { hw *hardware.FabricInterface } +// Providers returns a slice of the providers associated with the interface. func (f *FabricInterface) Providers() []string { return f.hw.Providers.ToSlice() } +// ProviderSet returns a StringSet of the providers associated with the interface. +func (f *FabricInterface) ProviderSet() common.StringSet { + return f.hw.Providers +} + func (f *FabricInterface) String() string { var dom string if f.Domain != "" { @@ -120,54 +127,51 @@ func (n *NUMAFabric) getNumNUMANodes() int { return len(n.numaMap) } +// FabricIfaceParams is a set of parameters associated with a fabric interface. +type FabricIfaceParams struct { + Interface string + Domain string + Provider string + DevClass hardware.NetDevClass + NUMANode int +} + // GetDevice selects the next available interface device on the requested NUMA node. -func (n *NUMAFabric) GetDevice(numaNode int, netDevClass hardware.NetDevClass, provider string) (*FabricInterface, error) { +func (n *NUMAFabric) GetDevice(params *FabricIfaceParams) (*FabricInterface, error) { if n == nil { return nil, errors.New("nil NUMAFabric") } - if provider == "" { + if params == nil { + return nil, errors.New("nil FabricIfaceParams") + } + + if params.Provider == "" { return nil, errors.New("provider is required") } n.mutex.Lock() defer n.mutex.Unlock() - fi, err := n.getDeviceFromNUMA(numaNode, netDevClass, provider) + fi, err := n.getDeviceFromNUMA(params.NUMANode, params.DevClass, params.Provider) if err == nil { - return copyFI(fi, provider), nil + return copyFI(fi), nil } - fi, err = n.findOnAnyNUMA(netDevClass, provider) + fi, err = n.findOnAnyNUMA(params.DevClass, params.Provider) if err != nil { return nil, err } - return copyFI(fi, provider), nil + return copyFI(fi), nil } -func copyFI(fi *FabricInterface, provider string) *FabricInterface { +func copyFI(fi *FabricInterface) *FabricInterface { fiCopy := new(FabricInterface) *fiCopy = *fi return fiCopy } -// Find finds a specific fabric device by name. -func (n *NUMAFabric) Find(name string) (*FabricInterface, error) { - if n == nil { - return nil, errors.New("nil NUMAFabric") - } - - for _, devs := range n.numaMap { - for _, fi := range devs { - if fi.Name == name { - return fi, nil - } - } - } - return nil, fmt.Errorf("fabric interface %q not found", name) -} - func (n *NUMAFabric) getDeviceFromNUMA(numaNode int, netDevClass hardware.NetDevClass, provider string) (*FabricInterface, error) { for checked := 0; checked < n.getNumDevices(numaNode); checked++ { fabricIF := n.getNextDevice(numaNode) @@ -273,6 +277,80 @@ func (n *NUMAFabric) setDefaultNUMANode() { } } +// Find finds a specific fabric device by name. There may be more than one domain associated. +func (n *NUMAFabric) Find(name string) ([]*FabricInterface, error) { + if n == nil { + return nil, errors.New("nil NUMAFabric") + } + + result := make([]*FabricInterface, 0) + for _, devs := range n.numaMap { + for _, fi := range devs { + if fi.Name == name { + result = append(result, copyFI(fi)) + } + } + } + + if len(result) > 0 { + return result, nil + } + + return nil, fmt.Errorf("fabric interface %q not found", name) +} + +// FindDevice looks up a fabric device with a given name, domain, and provider. +// NB: The domain and provider are optional. All other parameters are required. If there is more +// than one match, all of them are returned. +func (n *NUMAFabric) FindDevice(params *FabricIfaceParams) ([]*FabricInterface, error) { + if params == nil { + return nil, errors.New("nil FabricIfaceParams") + } + + fiList, err := n.Find(params.Interface) + if err != nil { + return nil, err + } + + if params.Domain != "" { + fiList = filterDomain(params.Domain, fiList) + if len(fiList) == 0 { + return nil, errors.Errorf("fabric interface %q doesn't have requested domain %q", + params.Interface, params.Domain) + } + } + + if params.Provider != "" { + fiList = filterProvider(params.Provider, fiList) + if len(fiList) == 0 { + return nil, errors.Errorf("fabric interface %q doesn't support provider %q", + params.Interface, params.Provider) + } + } + + return fiList, nil +} + +func filterDomain(domain string, fiList []*FabricInterface) []*FabricInterface { + result := make([]*FabricInterface, 0, len(fiList)) + for _, fi := range fiList { + if fi.Domain == domain || (fi.Name == domain && fi.Domain == "") { + result = append(result, fi) + } + } + return result +} + +func filterProvider(provider string, fiList []*FabricInterface) []*FabricInterface { + result := make([]*FabricInterface, 0, len(fiList)) + for _, fi := range fiList { + if fi.HasProvider(provider) { + result = append(result, fi) + } + } + return result +} + func newNUMAFabric(log logging.Logger) *NUMAFabric { return &NUMAFabric{ log: log, diff --git a/src/control/cmd/daos_agent/fabric_test.go b/src/control/cmd/daos_agent/fabric_test.go index aff4a8fdb58..e7227b518f5 100644 --- a/src/control/cmd/daos_agent/fabric_test.go +++ b/src/control/cmd/daos_agent/fabric_test.go @@ -172,21 +172,25 @@ func TestAgent_NUMAFabric_Add(t *testing.T) { func TestAgent_NUMAFabric_GetDevice(t *testing.T) { for name, tc := range map[string]struct { - nf *NUMAFabric - node int - provider string - netDevClass hardware.NetDevClass - expErr error - expResults []*FabricInterface + nf *NUMAFabric + params *FabricIfaceParams + expErr error + expResults []*FabricInterface }{ "nil": { expErr: errors.New("nil NUMAFabric"), }, + "nil params": { + nf: newNUMAFabric(nil), + expErr: errors.New("nil FabricIfaceParams"), + }, "empty": { - provider: "ofi+sockets", - nf: newNUMAFabric(nil), - netDevClass: hardware.Loopback, - expErr: errors.New("no suitable fabric interface"), + nf: newNUMAFabric(nil), + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Loopback, + }, + expErr: errors.New("no suitable fabric interface"), }, "no provider": { nf: &NUMAFabric{ @@ -213,12 +217,12 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Ether, - expErr: errors.New("provider is required"), + params: &FabricIfaceParams{ + DevClass: hardware.Ether, + }, + expErr: errors.New("provider is required"), }, "type not found": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -243,12 +247,13 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Infiniband, - expErr: errors.New("no suitable fabric interface"), + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + }, + expErr: errors.New("no suitable fabric interface"), }, "provider not found": { - provider: "ofi+verbs", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -273,12 +278,13 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Infiniband, - expErr: errors.New("no suitable fabric interface"), + params: &FabricIfaceParams{ + Provider: "ofi+verbs", + DevClass: hardware.Infiniband, + }, + expErr: errors.New("no suitable fabric interface"), }, "choose first device": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -291,8 +297,10 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Infiniband, + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + }, expResults: []*FabricInterface{ { Name: "t1", @@ -305,7 +313,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "choose later device": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -324,8 +331,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Infiniband, + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + NUMANode: 0, + }, expResults: []*FabricInterface{ { Name: "t2", @@ -342,7 +352,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "nothing on NUMA node": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -356,8 +365,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { 1: {}, }, }, - node: 1, - netDevClass: hardware.Infiniband, + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + NUMANode: 1, + }, expResults: []*FabricInterface{ { Name: "t1", @@ -366,7 +378,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "type not found on NUMA node": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -387,8 +398,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 1, - netDevClass: hardware.Infiniband, + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + NUMANode: 1, + }, expResults: []*FabricInterface{ { Name: "t1", @@ -401,7 +415,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "manual FI matches any": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -418,8 +431,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 1, - netDevClass: hardware.Infiniband, + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + NUMANode: 1, + }, expResults: []*FabricInterface{ { Name: "t2", @@ -432,7 +448,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "load balancing": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -460,8 +475,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { 0: 1, }, }, - node: 0, - netDevClass: hardware.Ether, + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Ether, + NUMANode: 0, + }, expResults: []*FabricInterface{ { Name: "t2", @@ -482,7 +500,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "validating IPs fails": { - provider: "ofi+sockets", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -498,12 +515,14 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { return nil, errors.New("mock getAddrInterface") }, }, - node: 0, - netDevClass: hardware.Infiniband, - expErr: FabricNotFoundErr(hardware.Infiniband), + params: &FabricIfaceParams{ + Provider: "ofi+sockets", + DevClass: hardware.Infiniband, + NUMANode: 0, + }, + expErr: FabricNotFoundErr(hardware.Infiniband), }, "specific provider": { - provider: "ofi+verbs", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -530,8 +549,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Ether, + params: &FabricIfaceParams{ + Provider: "ofi+verbs", + DevClass: hardware.Ether, + NUMANode: 0, + }, expResults: []*FabricInterface{ { Name: "t2", @@ -551,7 +573,6 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, "specific provider from other numa": { - provider: "ofi+verbs", nf: &NUMAFabric{ numaMap: map[int][]*FabricInterface{ 0: { @@ -572,8 +593,11 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { }, }, }, - node: 0, - netDevClass: hardware.Ether, + params: &FabricIfaceParams{ + Provider: "ofi+verbs", + DevClass: hardware.Ether, + NUMANode: 0, + }, expResults: []*FabricInterface{ { Name: "t2", @@ -598,9 +622,14 @@ func TestAgent_NUMAFabric_GetDevice(t *testing.T) { } } + numaNode := 0 + if tc.params != nil { + numaNode = tc.params.NUMANode + } + var results []*FabricInterface - for i := 0; i < tc.nf.NumDevices(tc.node)+1; i++ { - result, err := tc.nf.GetDevice(tc.node, tc.netDevClass, tc.provider) + for i := 0; i < tc.nf.NumDevices(numaNode)+1; i++ { + result, err := tc.nf.GetDevice(tc.params) common.CmpErr(t, tc.expErr, err) if tc.expErr != nil { return @@ -619,7 +648,7 @@ func TestAgent_NUMAFabric_Find(t *testing.T) { for name, tc := range map[string]struct { nf *NUMAFabric name string - expResult *FabricInterface + expResult []*FabricInterface expErr error }{ "nil": { @@ -668,9 +697,44 @@ func TestAgent_NUMAFabric_Find(t *testing.T) { }, }, name: "t2", - expResult: &FabricInterface{ - Name: "t2", - NetDevClass: hardware.Ether, + expResult: []*FabricInterface{ + { + Name: "t2", + NetDevClass: hardware.Ether, + }, + }, + }, + "multiple": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + NetDevClass: hardware.Ether, + }, + { + Name: "t2", + NetDevClass: hardware.Infiniband, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + }, + }, + }, + }, + name: "t2", + expResult: []*FabricInterface{ + { + Name: "t2", + NetDevClass: hardware.Infiniband, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + }, }, }, } { @@ -685,6 +749,373 @@ func TestAgent_NUMAFabric_Find(t *testing.T) { } } +func TestAgent_NUMAFabric_FindDevice(t *testing.T) { + for name, tc := range map[string]struct { + nf *NUMAFabric + params *FabricIfaceParams + expResult []*FabricInterface + expErr error + }{ + "nil": { + params: &FabricIfaceParams{ + Interface: "eth0", + }, + expErr: errors.New("nil"), + }, + "nil params": { + nf: newNUMAFabric(nil), + expErr: errors.New("nil"), + }, + "name not found": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + NetDevClass: hardware.Ether, + }, + { + Name: "t2", + NetDevClass: hardware.Ether, + }, + { + Name: "t3", + NetDevClass: hardware.Ether, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t4", + }, + expErr: errors.New("not found"), + }, + "no domain match": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Domain: "d1", + Provider: "p1", + }, + expErr: errors.New("doesn't have requested domain"), + }, + "no provider match": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Domain: "d2", + Provider: "p2", + }, + expErr: errors.New("doesn't support provider"), + }, + "success": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Domain: "d2", + Provider: "p2", + }, + expResult: []*FabricInterface{ + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + "success with no domain": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Provider: "p2", + }, + expResult: []*FabricInterface{ + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + "domain is name": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Domain: "t2", + Provider: "p1", + }, + expResult: []*FabricInterface{ + { + Name: "t2", + Domain: "", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + }, + }, + "success with no provider": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + Domain: "d2", + }, + expResult: []*FabricInterface{ + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + "more than one match": { + nf: &NUMAFabric{ + numaMap: map[int][]*FabricInterface{ + 0: { + { + Name: "t1", + Domain: "t1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + }, + params: &FabricIfaceParams{ + Interface: "t2", + }, + expResult: []*FabricInterface{ + { + Name: "t2", + Domain: "t2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p1"), + }, + }, + { + Name: "t2", + Domain: "d2", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("p2"), + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + result, err := tc.nf.FindDevice(tc.params) + + common.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResult, result, fiCmpOpt); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} + func TestAgent_NUMAFabricFromScan(t *testing.T) { for name, tc := range map[string]struct { input *hardware.FabricInterfaceSet diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go index aabcd313984..dd71a6bf7bf 100644 --- a/src/control/cmd/daos_agent/infocache.go +++ b/src/control/cmd/daos_agent/infocache.go @@ -164,7 +164,7 @@ func (c *localFabricCache) setCache(nf *NUMAFabric) { } // GetDevices fetches an appropriate fabric device from the cache. -func (c *localFabricCache) GetDevice(numaNode int, netDevClass hardware.NetDevClass, provider string) (*FabricInterface, error) { +func (c *localFabricCache) GetDevice(params *FabricIfaceParams) (*FabricInterface, error) { if c == nil { return nil, NotCachedErr } @@ -175,5 +175,5 @@ func (c *localFabricCache) GetDevice(numaNode int, netDevClass hardware.NetDevCl if !c.IsCached() { return nil, NotCachedErr } - return c.localNUMAFabric.GetDevice(numaNode, netDevClass, provider) + return c.localNUMAFabric.GetDevice(params) } diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go index e78785871a9..9b51acd7fbe 100644 --- a/src/control/cmd/daos_agent/infocache_test.go +++ b/src/control/cmd/daos_agent/infocache_test.go @@ -452,25 +452,30 @@ func TestAgent_localFabricCache_GetDevice(t *testing.T) { } for name, tc := range map[string]struct { - lfc *localFabricCache - numaNode int - netDevClass hardware.NetDevClass - provider string - expDevice *FabricInterface - expErr error + lfc *localFabricCache + params *FabricIfaceParams + expDevice *FabricInterface + expErr error }{ "nil cache": { expErr: NotCachedErr, }, + "nil params": { + lfc: newTestFabricCache(t, nil, populatedCache), + expErr: errors.New("nil"), + }, "nothing cached": { lfc: &localFabricCache{}, + params: &FabricIfaceParams{}, expErr: NotCachedErr, }, "request verbs": { - lfc: newTestFabricCache(t, nil, populatedCache), - numaNode: 2, - provider: "ofi+verbs", - netDevClass: hardware.Ether, + lfc: newTestFabricCache(t, nil, populatedCache), + params: &FabricIfaceParams{ + NUMANode: 2, + Provider: "ofi+verbs", + DevClass: hardware.Ether, + }, expDevice: &FabricInterface{ Name: "test7", NetDevClass: hardware.Ether, @@ -478,10 +483,12 @@ func TestAgent_localFabricCache_GetDevice(t *testing.T) { }, }, "request sockets": { - lfc: newTestFabricCache(t, nil, populatedCache), - numaNode: 0, - provider: "ofi+sockets", - netDevClass: hardware.Ether, + lfc: newTestFabricCache(t, nil, populatedCache), + params: &FabricIfaceParams{ + NUMANode: 0, + Provider: "ofi+sockets", + DevClass: hardware.Ether, + }, expDevice: &FabricInterface{ Name: "test2", NetDevClass: hardware.Ether, @@ -500,11 +507,7 @@ func TestAgent_localFabricCache_GetDevice(t *testing.T) { } } - if tc.provider == "" { - tc.provider = "ofi+tcp" - } - - dev, err := tc.lfc.GetDevice(tc.numaNode, tc.netDevClass, tc.provider) + dev, err := tc.lfc.GetDevice(tc.params) common.CmpErr(t, tc.expErr, err) if diff := cmp.Diff(tc.expDevice, dev, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index 2d93985958d..939b64076a3 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -14,6 +14,7 @@ import ( "golang.org/x/sys/unix" "google.golang.org/protobuf/proto" + "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/common/proto/convert" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/drpc" @@ -116,7 +117,7 @@ func (mod *mgmtModule) handleGetAttachInfo(ctx context.Context, reqb []byte, pid mod.log.Debugf("client process NUMA node %d", numaNode) - resp, err := mod.getAttachInfo(ctx, int(numaNode), pbReq.Sys) + resp, err := mod.getAttachInfo(ctx, int(numaNode), pbReq) if err != nil { return nil, err } @@ -143,30 +144,45 @@ func (mod *mgmtModule) getNUMANode(ctx context.Context, pid int32) (uint, error) return numaNode, nil } -func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) { - rawResp, err := mod.getAttachInfoResp(ctx, numaNode, sys) +func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, req *mgmtpb.GetAttachInfoReq) (*mgmtpb.GetAttachInfoResp, error) { + rawResp, err := mod.getAttachInfoResp(ctx, numaNode, req.Sys) if err != nil { mod.log.Errorf("failed to fetch remote AttachInfo: %s", err.Error()) return nil, err } - resp, err := mod.getProviderAttachInfo(rawResp) + reqProviders := mod.getInterfaceProviders(req.Interface, req.Domain) + + resp, err := mod.selectAttachInfo(rawResp, reqProviders) if err != nil { return nil, err } - fabricIF, err := mod.getFabricInterface(ctx, numaNode, hardware.NetDevClass(resp.ClientNetHint.NetDevClass), - resp.ClientNetHint.Provider) - if err != nil { - mod.log.Errorf("failed to fetch fabric interface of type %s: %s", - hardware.NetDevClass(resp.ClientNetHint.NetDevClass), err.Error()) - return nil, err + // Requested fabric interface/domain behave as a simple override. If we weren't able to + // validate them, we return them to the user with the understanding that perhaps the user + // knows what they're doing. + iface := req.Interface + domain := req.Domain + if req.Interface == "" { + fabricIF, err := mod.getFabricInterface(ctx, &FabricIfaceParams{ + NUMANode: numaNode, + DevClass: hardware.NetDevClass(resp.ClientNetHint.NetDevClass), + Provider: resp.ClientNetHint.Provider, + }) + if err != nil { + mod.log.Errorf("failed to fetch fabric interface of type %s: %s", + hardware.NetDevClass(resp.ClientNetHint.NetDevClass), err.Error()) + return nil, err + } + + iface = fabricIF.Name + domain = fabricIF.Domain } - resp.ClientNetHint.Interface = fabricIF.Name - resp.ClientNetHint.Domain = fabricIF.Name - if fabricIF.Domain != "" { - resp.ClientNetHint.Domain = fabricIF.Domain + resp.ClientNetHint.Interface = iface + resp.ClientNetHint.Domain = iface + if domain != "" { + resp.ClientNetHint.Domain = domain mod.log.Debugf("OFI_DOMAIN for %s has been detected as: %s", resp.ClientNetHint.Interface, resp.ClientNetHint.Domain) } @@ -178,35 +194,80 @@ func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, numaNode int, sys return mod.attachInfo.Get(ctx, numaNode, sys, mod.getAttachInfoRemote) } -func (mod *mgmtModule) getProviderAttachInfo(srvResp *mgmtpb.GetAttachInfoResp) (*mgmtpb.GetAttachInfoResp, error) { - if mod.provider == "" || mod.provider == srvResp.ClientNetHint.Provider { +func (mod *mgmtModule) getInterfaceProviders(iface, domain string) common.StringSet { + if iface == "" { + return nil + } + + if domain == "" { + domain = iface + } + + fis, err := mod.fabricInfo.localNUMAFabric.FindDevice(&FabricIfaceParams{ + Interface: iface, + Domain: domain, + }) + if err != nil { + mod.log.Errorf("client-requested fabric interface/domain not detected: %s", err.Error()) + mod.log.Error("communications on this interface may fail") + return nil + } + + providers := common.NewStringSet() + for _, fi := range fis { + providers.AddUnique(fi.Providers()...) + } + return providers +} + +func (mod *mgmtModule) selectAttachInfo(srvResp *mgmtpb.GetAttachInfoResp, reqProviders common.StringSet) (*mgmtpb.GetAttachInfoResp, error) { + providers := reqProviders + if mod.provider != "" { + if len(reqProviders) > 0 && !reqProviders.Has(mod.provider) { + mod.log.Errorf("configured provider %q not included in requested interface's detected providers: %s", reqProviders) + mod.log.Error("communications on this interface may fail") + } + providers = common.NewStringSet(mod.provider) + } + + if len(providers) == 0 { return srvResp, nil } + if providers.Has(srvResp.ClientNetHint.Provider) { + return srvResp, nil + } + + for _, hint := range srvResp.SecondaryClientNetHints { + if providers.Has(hint.Provider) { + uris, err := mod.getProviderURIs(srvResp, hint.Provider) + if err == nil { + return &mgmtpb.GetAttachInfoResp{ + Status: srvResp.Status, + RankUris: uris, + MsRanks: srvResp.MsRanks, + ClientNetHint: hint, + }, nil + } + } + } + + return nil, errors.Errorf("no valid connection information for providers: %s", providers) +} + +func (mod *mgmtModule) getProviderURIs(srvResp *mgmtpb.GetAttachInfoResp, provider string) ([]*mgmtpb.GetAttachInfoResp_RankUri, error) { uris := []*mgmtpb.GetAttachInfoResp_RankUri{} for _, uri := range srvResp.SecondaryRankUris { - if uri.Provider == mod.provider { + if uri.Provider == provider { uris = append(uris, uri) } } if len(uris) == 0 { - return nil, errors.Errorf("no rank URIs for provider %q", mod.provider) + return nil, errors.Errorf("no rank URIs for provider %q", provider) } - for _, hint := range srvResp.SecondaryClientNetHints { - if hint.Provider == mod.provider { - - return &mgmtpb.GetAttachInfoResp{ - Status: srvResp.Status, - RankUris: uris, - MsRanks: srvResp.MsRanks, - ClientNetHint: hint, - }, nil - } - } - - return nil, errors.Errorf("no ClientNetHint for provider %q", mod.provider) + return uris, nil } func (mod *mgmtModule) getAttachInfoRemote(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) { @@ -232,9 +293,9 @@ func (mod *mgmtModule) getAttachInfoRemote(ctx context.Context, numaNode int, sy return pbResp, nil } -func (mod *mgmtModule) getFabricInterface(ctx context.Context, numaNode int, netDevClass hardware.NetDevClass, provider string) (*FabricInterface, error) { +func (mod *mgmtModule) getFabricInterface(ctx context.Context, params *FabricIfaceParams) (*FabricInterface, error) { if mod.fabricInfo.IsCached() { - return mod.fabricInfo.GetDevice(numaNode, netDevClass, provider) + return mod.getCachedInterface(ctx, params) } scanner := hwprov.DefaultFabricScanner(mod.log) @@ -246,7 +307,18 @@ func (mod *mgmtModule) getFabricInterface(ctx context.Context, numaNode int, net mod.fabricInfo.CacheScan(ctx, result) - return mod.fabricInfo.GetDevice(numaNode, netDevClass, provider) + return mod.getCachedInterface(ctx, params) +} + +func (mod *mgmtModule) getCachedInterface(ctx context.Context, params *FabricIfaceParams) (*FabricInterface, error) { + if params.Interface != "" { + fi, err := mod.fabricInfo.localNUMAFabric.FindDevice(params) + if err != nil { + return nil, err + } + return fi[0], nil + } + return mod.fabricInfo.GetDevice(params) } func (mod *mgmtModule) handleNotifyPoolConnect(ctx context.Context, reqb []byte, pid int32) error { diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go index cd6167024df..e74bbb1b805 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc_test.go +++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go @@ -92,13 +92,13 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { SecondaryClientNetHints: []*mgmtpb.ClientNetHint{ { Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Ether), + NetDevClass: uint32(hardware.Infiniband), }, }, } } - hintResp := func(fi, domain string) *mgmtpb.GetAttachInfoResp { + priResp := func(fi, domain string) *mgmtpb.GetAttachInfoResp { withHint := testSrvResp() withHint.ClientNetHint.Interface = fi withHint.ClientNetHint.Domain = domain @@ -106,12 +106,43 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { return withHint } + secResp := func(fi, domain string) *mgmtpb.GetAttachInfoResp { + return &mgmtpb.GetAttachInfoResp{ + RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ + { + Rank: 0, + Uri: "uri0-sec", + Provider: "ofi+tcp", + }, + { + Rank: 1, + Uri: "uri1-sec", + Provider: "ofi+tcp", + }, + { + Rank: 3, + Uri: "uri3-sec", + Provider: "ofi+tcp", + }, + }, + MsRanks: []uint32{0, 1, 3}, + ClientNetHint: &mgmtpb.ClientNetHint{ + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Infiniband), + Interface: fi, + Domain: domain, + }, + } + } + for name, tc := range map[string]struct { - provider string - numaNode int - rpcResp *control.HostResponse - expResp *mgmtpb.GetAttachInfoResp - expErr error + reqIface string + reqDomain string + provider string + numaNode int + rpcResp *control.HostResponse + expResp *mgmtpb.GetAttachInfoResp + expErr error }{ "RPC error": { rpcResp: &control.HostResponse{ @@ -161,60 +192,102 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expResp: hintResp("fi0", "d0"), + expResp: priResp("fi0", "d0"), }, "primary provider": { provider: "ofi+verbs", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expResp: hintResp("fi0", "d0"), + expResp: priResp("fi0", "d0"), }, "secondary provider": { provider: "ofi+tcp", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expResp: &mgmtpb.GetAttachInfoResp{ - RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - { - Rank: 0, - Uri: "uri0-sec", - Provider: "ofi+tcp", - }, - { - Rank: 1, - Uri: "uri1-sec", - Provider: "ofi+tcp", - }, - { - Rank: 3, - Uri: "uri3-sec", - Provider: "ofi+tcp", - }, - }, - MsRanks: []uint32{0, 1, 3}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Ether), - Interface: "fi1", - Domain: "fi1", - }, + expResp: secResp("fi0", "fi0"), + }, + "client req iface and domain": { + reqIface: "fi1", + reqDomain: "d1", + provider: "ofi+verbs", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: priResp("fi1", "d1"), + }, + "client req secondary provider": { + reqIface: "fi1", + reqDomain: "fi1", + provider: "ofi+tcp", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), }, + expResp: secResp("fi1", "fi1"), + }, + "client req iface for secondary provider": { + reqIface: "fi1", + reqDomain: "fi1", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: secResp("fi1", "fi1"), + }, + "client req iface only": { + reqIface: "fi1", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: secResp("fi1", "fi1"), + }, + "client req domain-only ignored": { + reqDomain: "d2", + provider: "ofi+verbs", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: priResp("fi0", "d0"), + }, + "client req provider mismatch ignored": { + reqIface: "fi1", + reqDomain: "d1", + provider: "ofi+tcp", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: secResp("fi1", "d1"), + }, + "client req iface/domain mismatch ignored": { + reqIface: "fi0", + reqDomain: "d2", + provider: "ofi+verbs", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: priResp("fi0", "d2"), + }, + "client req iface not found ignored": { + reqIface: "notreal", + provider: "ofi+verbs", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expResp: priResp("notreal", "notreal"), }, "config provider not found": { provider: "notreal", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expErr: errors.New("no rank URIs for provider"), + expErr: errors.New("no valid connection information"), }, "config provider hint missing": { provider: "ofi+sockets", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expErr: errors.New("no ClientNetHint for provider"), + expErr: errors.New("no valid connection information"), }, } { t.Run(name, func(t *testing.T) { @@ -233,9 +306,26 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { Providers: common.NewStringSet("ofi+verbs"), }, }, + { + Name: "fi0", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("ofi+tcp"), + }, + }, + }, + 1: { + { + Name: "fi1", + Domain: "d1", + NetDevClass: hardware.Infiniband, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("ofi+verbs"), + }, + }, { Name: "fi1", - NetDevClass: hardware.Ether, + NetDevClass: hardware.Infiniband, hw: &hardware.FabricInterface{ Providers: common.NewStringSet("ofi+tcp"), }, @@ -259,7 +349,12 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { provider: tc.provider, } - resp, err := mod.getAttachInfo(context.Background(), tc.numaNode, sysName) + resp, err := mod.getAttachInfo(context.Background(), tc.numaNode, + &mgmtpb.GetAttachInfoReq{ + Sys: sysName, + Interface: tc.reqIface, + Domain: tc.reqDomain, + }) common.CmpErr(t, tc.expErr, err) if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported( @@ -383,7 +478,10 @@ func TestAgent_mgmtModule_getAttachInfo_cacheResp(t *testing.T) { } for _, expResp := range tc.expResps { - resp, err := mod.getAttachInfo(context.Background(), 0, sysName) + resp, err := mod.getAttachInfo(context.Background(), 0, + &mgmtpb.GetAttachInfoReq{ + Sys: sysName, + }) common.CmpErr(t, nil, err) @@ -445,7 +543,10 @@ func TestAgent_mgmtModule_getAttachInfo_Parallel(t *testing.T) { go func(n int) { defer wg.Done() - _, err := mod.getAttachInfo(context.Background(), 0, sysName) + _, err := mod.getAttachInfo(context.Background(), 0, + &mgmtpb.GetAttachInfoReq{ + Sys: sysName, + }) if err != nil { panic(errors.Wrapf(err, "thread %d", n)) } diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index a87c5043fae..3098d03b5e1 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -527,8 +527,10 @@ type GetAttachInfoReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // System name. For daos_agent only. - AllRanks bool `protobuf:"varint,2,opt,name=all_ranks,json=allRanks,proto3" json:"all_ranks,omitempty"` // Return Rank URIs for all ranks. + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // System name. For daos_agent only. + AllRanks bool `protobuf:"varint,2,opt,name=all_ranks,json=allRanks,proto3" json:"all_ranks,omitempty"` // Return Rank URIs for all ranks. + Interface string `protobuf:"bytes,3,opt,name=interface,proto3" json:"interface,omitempty"` // Preferred fabric interface. + Domain string `protobuf:"bytes,4,opt,name=domain,proto3" json:"domain,omitempty"` // Preferred fabric domain. } func (x *GetAttachInfoReq) Reset() { @@ -577,6 +579,20 @@ func (x *GetAttachInfoReq) GetAllRanks() bool { return false } +func (x *GetAttachInfoReq) GetInterface() string { + if x != nil { + return x.Interface + } + return "" +} + +func (x *GetAttachInfoReq) GetDomain() string { + if x != nil { + return x.Domain + } + return "" +} + type ClientNetHint struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1146,73 +1162,76 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, + 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x77, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0xf3, 0x01, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, - 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, - 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, - 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, - 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, - 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, - 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, - 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, - 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, - 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, - 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, - 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, - 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x22, 0xb1, 0x03, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, - 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, - 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, - 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, - 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, - 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, - 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, - 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, - 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, - 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, - 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, - 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, - 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, - 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, - 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, - 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, - 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, - 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, 0x4b, 0x0a, - 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, - 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x1a, - 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, - 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, - 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, - 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, - 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, - 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, - 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, - 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, - 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, - 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, - 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, - 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, - 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, - 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, - 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, - 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, - 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, + 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, + 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0xf3, 0x01, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, + 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, + 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, + 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, + 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, 0x72, + 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, 0x72, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, 0x68, + 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, 0x72, + 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, 0x5f, + 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, + 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x22, 0xb1, 0x03, 0x0a, + 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, + 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, + 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, + 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, + 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, + 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, + 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, + 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, + 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x11, + 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, + 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x63, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x73, 0x18, + 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, + 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, + 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, + 0x6e, 0x74, 0x73, 0x1a, 0x4b, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, + 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, + 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x75, 0x72, 0x69, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, + 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, + 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, + 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, 0x0a, 0x0a, 0x53, 0x65, + 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x7c, 0x0a, 0x0e, + 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, + 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, + 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, + 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, + 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c index 543bbf31b3d..49ec03a2db6 100644 --- a/src/mgmt/cli_mgmt.c +++ b/src/mgmt/cli_mgmt.c @@ -228,6 +228,8 @@ get_attach_info(const char *name, bool all_ranks, struct dc_mgmt_sys_info *info, size_t reqb_size; Drpc__Call *dreq; Drpc__Response *dresp; + char *ofi_interface; + char *ofi_domain; int rc; D_DEBUG(DB_MGMT, "getting attach info for %s\n", name); @@ -243,9 +245,18 @@ get_attach_info(const char *name, bool all_ranks, struct dc_mgmt_sys_info *info, D_GOTO(out, rc); } + ofi_interface = getenv("OFI_INTERFACE"); + if (ofi_interface) + D_INFO("Using client provided OFI_INTERFACE: %s\n", ofi_interface); + ofi_domain = getenv("OFI_DOMAIN"); + if (ofi_domain) + D_INFO("Using client provided OFI_DOMAIN: %s\n", ofi_domain); + /* Prepare the GetAttachInfo request. */ req.sys = (char *)name; req.all_ranks = all_ranks; + req.interface = ofi_interface; + req.domain = ofi_domain; reqb_size = mgmt__get_attach_info_req__get_packed_size(&req); D_ALLOC(reqb, reqb_size); if (reqb == NULL) { @@ -328,8 +339,6 @@ int dc_mgmt_net_cfg(const char *name) int rc; char buf[SYS_INFO_BUF_SIZE]; char *crt_timeout; - char *ofi_interface; - char *ofi_domain; char *cli_srx_set; struct dc_mgmt_sys_info info; Mgmt__GetAttachInfoResp *resp; @@ -379,31 +388,14 @@ int dc_mgmt_net_cfg(const char *name) crt_timeout); } - ofi_interface = getenv("OFI_INTERFACE"); - ofi_domain = getenv("OFI_DOMAIN"); - if (!ofi_interface) { - rc = setenv("OFI_INTERFACE", info.interface, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); - - /* - * If we use the agent as the source, client env shouldn't be allowed to override - * the domain. Otherwise we could get a mismatch between interface and domain. - */ - if (ofi_domain) - D_WARN("Ignoring OFI_DOMAIN '%s' because OFI_INTERFACE is not set; using " - "automatic configuration instead\n", ofi_domain); - - rc = setenv("OFI_DOMAIN", info.domain, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); - } else { - D_INFO("Using client provided OFI_INTERFACE: %s\n", ofi_interface); + /* client-provided iface/domain were already taken into account by agent */ + rc = setenv("OFI_INTERFACE", info.interface, 1); + if (rc != 0) + D_GOTO(cleanup, rc = d_errno2der(errno)); - /* If the client env didn't provide a domain, we can assume we don't need one. */ - if (ofi_domain) - D_INFO("Using client provided OFI_DOMAIN: %s\n", ofi_domain); - } + rc = setenv("OFI_DOMAIN", info.domain, 1); + if (rc != 0) + D_GOTO(cleanup, rc = d_errno2der(errno)); D_DEBUG(DB_MGMT, "CaRT initialization with:\n" diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index a2fc1458ce2..c9f943153e3 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1189,7 +1189,7 @@ const ProtobufCMessageDescriptor mgmt__leader_query_resp__descriptor = (ProtobufCMessageInit) mgmt__leader_query_resp__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__get_attach_info_req__field_descriptors[2] = +static const ProtobufCFieldDescriptor mgmt__get_attach_info_req__field_descriptors[4] = { { "sys", @@ -1215,15 +1215,41 @@ static const ProtobufCFieldDescriptor mgmt__get_attach_info_req__field_descripto 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "interface", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoReq, interface), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "domain", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoReq, domain), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__get_attach_info_req__field_indices_by_name[] = { 1, /* field[1] = all_ranks */ + 3, /* field[3] = domain */ + 2, /* field[2] = interface */ 0, /* field[0] = sys */ }; static const ProtobufCIntRange mgmt__get_attach_info_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 2 } + { 0, 4 } }; const ProtobufCMessageDescriptor mgmt__get_attach_info_req__descriptor = { @@ -1233,7 +1259,7 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_req__descriptor = "Mgmt__GetAttachInfoReq", "mgmt", sizeof(Mgmt__GetAttachInfoReq), - 2, + 4, mgmt__get_attach_info_req__field_descriptors, mgmt__get_attach_info_req__field_indices_by_name, 1, mgmt__get_attach_info_req__number_ranges, diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 2ad92eb134c..5c384cb0616 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -216,10 +216,18 @@ struct _Mgmt__GetAttachInfoReq * Return Rank URIs for all ranks. */ protobuf_c_boolean all_ranks; + /* + * Preferred fabric interface. + */ + char *interface; + /* + * Preferred fabric domain. + */ + char *domain; }; #define MGMT__GET_ATTACH_INFO_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_req__descriptor) \ - , (char *)protobuf_c_empty_string, 0 } + , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } struct _Mgmt__ClientNetHint diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index cc35f5e2305..38c137dc1b7 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -67,6 +67,8 @@ message LeaderQueryResp { message GetAttachInfoReq { string sys = 1; // System name. For daos_agent only. bool all_ranks = 2; // Return Rank URIs for all ranks. + string interface = 3; // Preferred fabric interface. + string domain = 4; // Preferred fabric domain. } message ClientNetHint { From b98906a318567451b39d24635a0c111c40e10aad Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Mon, 6 Jun 2022 07:06:46 -0700 Subject: [PATCH 08/28] CART-89 cart: Cart multiprov changes (#8952) Cart level changes for multiprovider support, phase1. No bulk support yet. Signed-off-by: Alexander A Oganezov --- src/cart/crt_context.c | 97 +++- src/cart/crt_ctl.c | 2 +- src/cart/crt_group.c | 29 +- src/cart/crt_hg.c | 70 ++- src/cart/crt_hg.h | 43 +- src/cart/crt_init.c | 578 +++++++++++++------- src/cart/crt_internal_fns.h | 4 +- src/cart/crt_internal_types.h | 17 +- src/cart/crt_swim.c | 4 +- src/include/cart/api.h | 34 ++ src/include/cart/types.h | 29 + src/tests/ftest/cart/SConscript | 1 + src/tests/ftest/cart/dual_provider_client.c | 303 ++++++++++ src/tests/ftest/cart/dual_provider_common.h | 188 +++++++ src/tests/ftest/cart/dual_provider_server.c | 393 +++++++++++++ 15 files changed, 1473 insertions(+), 319 deletions(-) create mode 100644 src/tests/ftest/cart/dual_provider_client.c create mode 100644 src/tests/ftest/cart/dual_provider_common.h create mode 100644 src/tests/ftest/cart/dual_provider_server.c diff --git a/src/cart/crt_context.c b/src/cart/crt_context.c index c779cb107ee..8a1052dac9e 100644 --- a/src/cart/crt_context.c +++ b/src/cart/crt_context.c @@ -173,7 +173,42 @@ crt_context_init(crt_context_t crt_ctx) } int -crt_context_provider_create(crt_context_t *crt_ctx, int provider) +crt_context_provider_create(crt_context_t *crt_ctx, crt_provider_t provider, bool primary); + +int +crt_context_create_on_provider(crt_context_t *crt_ctx, const char *provider, bool primary) +{ + int provider_idx = -1; + + provider_idx = crt_str_to_provider(provider); + if (provider_idx == -1) { + D_ERROR("Invalid requested provider '%s'\n", provider); + return -DER_INVAL; + } + + return crt_context_provider_create(crt_ctx, provider_idx, primary); +} + +int +crt_context_uri_get(crt_context_t crt_ctx, char **uri) +{ + struct crt_context *ctx = NULL; + + if (crt_ctx == NULL || uri == NULL) { + D_ERROR("Invalid null parameters\n"); + return -DER_INVAL; + } + + ctx = crt_ctx; + D_STRNDUP(*uri, ctx->cc_self_uri, CRT_ADDR_STR_MAX_LEN); + if (*uri == NULL) + return DER_NOMEM; + + return DER_SUCCESS; +} + +int +crt_context_provider_create(crt_context_t *crt_ctx, crt_provider_t provider, bool primary) { struct crt_context *ctx = NULL; int rc = 0; @@ -210,6 +245,7 @@ crt_context_provider_create(crt_context_t *crt_ctx, int provider) D_GOTO(out, rc); } + ctx->cc_primary = primary; D_RWLOCK_WRLOCK(&crt_gdata.cg_rwlock); rc = crt_hg_ctx_init(&ctx->cc_hg_ctx, provider, cur_ctx_num); @@ -221,15 +257,13 @@ crt_context_provider_create(crt_context_t *crt_ctx, int provider) D_GOTO(out, rc); } - if (crt_is_service()) { - rc = crt_hg_get_addr(ctx->cc_hg_ctx.chc_hgcla, - ctx->cc_self_uri, &uri_len); - if (rc != 0) { - D_ERROR("ctx_hg_get_addr() failed; rc: %d.\n", rc); - D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); - crt_context_destroy(ctx, true); - D_GOTO(out, rc); - } + rc = crt_hg_get_addr(ctx->cc_hg_ctx.chc_hgcla, + ctx->cc_self_uri, &uri_len); + if (rc != 0) { + D_ERROR("ctx_hg_get_addr() failed; rc: %d.\n", rc); + D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); + crt_context_destroy(ctx, true); + D_GOTO(out, rc); } ctx->cc_idx = cur_ctx_num; @@ -284,7 +318,7 @@ crt_context_provider_create(crt_context_t *crt_ctx, int provider) D_GOTO(out, rc); } - if (provider == CRT_NA_OFI_SOCKETS || provider == CRT_NA_OFI_TCP_RXM) { + if (provider == CRT_PROV_OFI_SOCKETS || provider == CRT_PROV_OFI_TCP_RXM) { struct crt_grp_priv *grp_priv = crt_gdata.cg_grp->gg_primary_grp; struct crt_swim_membs *csm = &grp_priv->gp_membs_swim; @@ -306,10 +340,40 @@ crt_context_provider_create(crt_context_t *crt_ctx, int provider) return rc; } +bool +crt_context_is_primary(crt_context_t crt_ctx) +{ + struct crt_context *ctx; + + ctx = crt_ctx; + + return ctx->cc_primary; +} + int crt_context_create(crt_context_t *crt_ctx) { - return crt_context_provider_create(crt_ctx, crt_gdata.cg_init_prov); + return crt_context_provider_create(crt_ctx, crt_gdata.cg_primary_prov, true); +} + +int +crt_context_create_secondary(crt_context_t *crt_ctx, int idx) +{ + crt_provider_t sec_prov; + + if (crt_gdata.cg_secondary_provs == NULL) { + D_ERROR("Secondary provider not initialized\n"); + return -DER_INVAL; + } + + /* TODO: Use idx later to ref other providers */ + sec_prov = crt_gdata.cg_secondary_provs[0]; + if (sec_prov == CRT_PROV_UNKNOWN) { + D_ERROR("Unknown secondary provider\n"); + return -DER_INVAL; + } + + return crt_context_provider_create(crt_ctx, sec_prov, false); } int @@ -649,7 +713,8 @@ crt_rank_abort(d_rank_t rank) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_init_prov); + /* TODO: Do we need to handle secondary providers? */ + ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { rc = 0; D_MUTEX_LOCK(&ctx->cc_mutex); @@ -1193,7 +1258,7 @@ crt_context_lookup_locked(int ctx_idx) struct crt_context *ctx; d_list_t *ctx_list; - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_init_prov); + ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { if (ctx->cc_idx == ctx_idx) @@ -1213,7 +1278,7 @@ crt_context_lookup(int ctx_idx) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_init_prov); + ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { if (ctx->cc_idx == ctx_idx) { @@ -1279,7 +1344,7 @@ crt_context_num(int *ctx_num) return -DER_INVAL; } - *ctx_num = crt_gdata.cg_prov_gdata[crt_gdata.cg_init_prov].cpg_ctx_num; + *ctx_num = crt_gdata.cg_prov_gdata[crt_gdata.cg_primary_prov].cpg_ctx_num; return 0; } diff --git a/src/cart/crt_ctl.c b/src/cart/crt_ctl.c index 3357706048c..fa6c0273558 100644 --- a/src/cart/crt_ctl.c +++ b/src/cart/crt_ctl.c @@ -202,7 +202,7 @@ crt_hdlr_ctl_ls(crt_rpc_t *rpc_req) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); /* TODO: Need to derive provider from rpc struct */ - provider = crt_gdata.cg_init_prov; + provider = crt_gdata.cg_primary_prov; ctx_list = crt_provider_get_ctx_list(provider); diff --git a/src/cart/crt_group.c b/src/cart/crt_group.c index a45552e6145..197fa83947f 100644 --- a/src/cart/crt_group.c +++ b/src/cart/crt_group.c @@ -383,7 +383,7 @@ grp_li_uri_set(struct crt_lookup_item *li, int tag, const char *uri) d_rank_t rank; int rc = 0; int i; - enum crt_na_type prov_type; + crt_provider_t provider; rank = li->li_rank; grp_priv = li->li_grp_priv; @@ -400,20 +400,20 @@ grp_li_uri_set(struct crt_lookup_item *li, int tag, const char *uri) ui->ui_initialized = 1; ui->ui_rank = li->li_rank; - rc = crt_hg_parse_uri(uri, &prov_type, base_addr); + rc = crt_hg_parse_uri(uri, &provider, base_addr); if (rc) D_GOTO(exit, rc); D_DEBUG(DB_NET, "Parsed uri '%s', base_addr='%s' prov=%d\n", - uri, base_addr, prov_type); + uri, base_addr, provider); - if (crt_provider_is_contig_ep(prov_type)) { - if (crt_provider_is_port_based(prov_type)) { - rc = generate_port_based_uris(prov_type, base_addr, tag, ui); - } else if (prov_type == CRT_NA_OFI_CXI) { - rc = generate_cxi_uris(prov_type, base_addr, tag, ui); + if (crt_provider_is_contig_ep(provider)) { + if (crt_provider_is_port_based(provider)) { + rc = generate_port_based_uris(provider, base_addr, tag, ui); + } else if (provider == CRT_PROV_OFI_CXI) { + rc = generate_cxi_uris(provider, base_addr, tag, ui); } else { - D_ERROR("Unknown provider %d for uri='%s'\n", prov_type, uri); + D_ERROR("Unknown provider %d for uri='%s'\n", provider, uri); rc = -DER_INVAL; } @@ -434,7 +434,7 @@ grp_li_uri_set(struct crt_lookup_item *li, int tag, const char *uri) if (rc != 0) { D_ERROR("Entry already present\n"); - if (crt_provider_is_contig_ep(prov_type)) { + if (crt_provider_is_contig_ep(provider)) { for (i = 0; i < CRT_SRV_CONTEXT_NUM; i++) D_FREE(ui->ui_uri[i]); } else { @@ -926,6 +926,7 @@ crt_grp_lc_lookup(struct crt_grp_priv *grp_priv, int ctx_idx, struct crt_lookup_item *li; d_list_t *rlink; struct crt_grp_priv *default_grp_priv; + crt_provider_t provider; D_ASSERT(grp_priv != NULL); @@ -933,8 +934,10 @@ crt_grp_lc_lookup(struct crt_grp_priv *grp_priv, int ctx_idx, D_ASSERT(uri != NULL || hg_addr != NULL); D_ASSERT(ctx_idx >= 0 && ctx_idx < CRT_SRV_CONTEXT_NUM); + provider = crt_gdata.cg_primary_prov; + /* TODO: Derive from context */ - if (crt_provider_is_sep(crt_gdata.cg_init_prov)) + if (crt_provider_is_sep(provider)) tag = 0; default_grp_priv = grp_priv; @@ -1902,7 +1905,7 @@ crt_group_config_save(crt_group_t *grp, bool forall) rank = grp_priv->gp_self; /* TODO: Per provider address needs to be stored in future */ - addr = crt_gdata.cg_prov_gdata[crt_gdata.cg_init_prov].cpg_addr; + addr = crt_gdata.cg_prov_gdata[crt_gdata.cg_primary_prov].cpg_addr; grpid = grp_priv->gp_pub.cg_grpid; filename = crt_grp_attach_info_filename(grp_priv); @@ -2528,7 +2531,7 @@ crt_rank_self_set(d_rank_t rank) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_init_prov); + ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { hg_class = ctx->cc_hg_ctx.chc_hgcla; diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index aa6e64123c0..c45893dfd84 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -12,95 +12,95 @@ #include "mercury_util.h" /* - * na_dict table should be in the same order of enum crt_na_type, the last one + * na_dict table should be in the same order of enum crt_provider_t, the last one * is terminator with NULL nad_str. */ struct crt_na_dict crt_na_dict[] = { { - .nad_type = CRT_NA_SM, + .nad_type = CRT_PROV_SM, .nad_str = "sm", .nad_contig_eps = false, .nad_port_bind = false, }, { - .nad_type = CRT_NA_OFI_SOCKETS, + .nad_type = CRT_PROV_OFI_SOCKETS, .nad_str = "ofi+sockets", .nad_alt_str = "ofi+socket", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_OFI_VERBS_RXM, + .nad_type = CRT_PROV_OFI_VERBS_RXM, .nad_str = "ofi+verbs;ofi_rxm", .nad_alt_str = "ofi+verbs", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_OFI_GNI, + .nad_type = CRT_PROV_OFI_GNI, .nad_str = "ofi+gni", .nad_contig_eps = true, .nad_port_bind = false, }, { - .nad_type = CRT_NA_OFI_PSM2, + .nad_type = CRT_PROV_OFI_PSM2, .nad_str = "ofi+psm2", .nad_contig_eps = false, .nad_port_bind = false, }, { - .nad_type = CRT_NA_OFI_TCP_RXM, + .nad_type = CRT_PROV_OFI_TCP_RXM, .nad_str = "ofi+tcp;ofi_rxm", .nad_alt_str = "ofi+tcp", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_OFI_CXI, + .nad_type = CRT_PROV_OFI_CXI, .nad_str = "ofi+cxi", .nad_contig_eps = true, .nad_port_bind = false, }, { - .nad_type = CRT_NA_UCX_RC, + .nad_type = CRT_PROV_UCX_RC, .nad_str = "ucx+rc_v", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_UD, + .nad_type = CRT_PROV_UCX_UD, .nad_str = "ucx+ud_v", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_RC_UD, + .nad_type = CRT_PROV_UCX_RC_UD, .nad_str = "ucx+rc_v,ud_v", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_RC_O, + .nad_type = CRT_PROV_UCX_RC_O, .nad_str = "ucx+rc", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_UD_O, + .nad_type = CRT_PROV_UCX_UD_O, .nad_str = "ucx+ud", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_RC_UD_O, + .nad_type = CRT_PROV_UCX_RC_UD_O, .nad_str = "ucx+rc,ud", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_RC_X, + .nad_type = CRT_PROV_UCX_RC_X, .nad_str = "ucx+rc_x", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_UD_X, + .nad_type = CRT_PROV_UCX_UD_X, .nad_str = "ucx+ud_x", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_RC_UD_X, + .nad_type = CRT_PROV_UCX_RC_UD_X, .nad_str = "ucx+rc_x,ud_x", .nad_contig_eps = true, .nad_port_bind = true, }, { - .nad_type = CRT_NA_UCX_DC_X, + .nad_type = CRT_PROV_UCX_DC_X, .nad_str = "ucx+dc_x", .nad_contig_eps = true, .nad_port_bind = true, @@ -110,7 +110,7 @@ struct crt_na_dict crt_na_dict[] = { }; int -crt_hg_parse_uri(const char *uri, enum crt_na_type *prov, char *addr) +crt_hg_parse_uri(const char *uri, crt_provider_t *prov, char *addr) { char copy_uri[CRT_ADDR_STR_MAX_LEN]; char *provider_str; @@ -137,7 +137,7 @@ crt_hg_parse_uri(const char *uri, enum crt_na_type *prov, char *addr) } if (prov) - *prov = crt_prov_str_to_na_type(provider_str); + *prov = crt_prov_str_to_prov(provider_str); if (addr) strncpy(addr, addr_str+2, CRT_ADDR_STR_MAX_LEN - 1); @@ -145,19 +145,19 @@ crt_hg_parse_uri(const char *uri, enum crt_na_type *prov, char *addr) return 0; } -enum crt_na_type -crt_prov_str_to_na_type(const char *prov_str) +crt_provider_t +crt_prov_str_to_prov(const char *prov_str) { int i; - for (i = 0; i < CRT_NA_COUNT; i++) { + for (i = 0; i < CRT_PROV_COUNT; i++) { if (strcmp(prov_str, crt_na_dict[i].nad_str) == 0 || (crt_na_dict[i].nad_alt_str && strcmp(prov_str, crt_na_dict[i].nad_alt_str) == 0)) return crt_na_dict[i].nad_type; } - return CRT_NA_UNKNOWN; + return CRT_PROV_UNKNOWN; } /** @@ -454,7 +454,7 @@ crt_provider_ctx0_port_get(int provider) { struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); - return prov_data->cpg_na_ofi_config.noc_port; + return prov_data->cpg_na_config.noc_port; } static char* @@ -462,7 +462,7 @@ crt_provider_domain_get(int provider) { struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); - return prov_data->cpg_na_ofi_config.noc_domain; + return prov_data->cpg_na_config.noc_domain; } char * @@ -476,13 +476,13 @@ crt_provider_ip_str_get(int provider) { struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); - return prov_data->cpg_na_ofi_config.noc_ip_str; + return prov_data->cpg_na_config.noc_ip_str; } static bool crt_provider_is_block_mode(int provider) { - if (provider == CRT_NA_OFI_PSM2) + if (provider == CRT_PROV_OFI_PSM2) return false; return true; @@ -569,7 +569,7 @@ crt_get_info_string(int provider, char **string, int ctx_idx) domain_str = crt_provider_domain_get(provider); ip_str = crt_provider_ip_str_get(provider); - if (provider == CRT_NA_SM) { + if (provider == CRT_PROV_SM) { D_ASPRINTF(*string, "%s://", provider_str); D_GOTO(out, 0); } @@ -714,13 +714,11 @@ crt_hg_class_init(int provider, int idx, hg_class_t **ret_hg_class) D_GOTO(out, rc = -DER_HG); } - if (crt_is_service()) { - rc = crt_hg_get_addr(hg_class, addr_str, &str_size); - if (rc != 0) { - D_ERROR("crt_hg_get_addr() failed, rc: %d.\n", rc); - HG_Finalize(hg_class); - D_GOTO(out, rc = -DER_HG); - } + rc = crt_hg_get_addr(hg_class, addr_str, &str_size); + if (rc != 0) { + D_ERROR("crt_hg_get_addr() failed, rc: %d.\n", rc); + HG_Finalize(hg_class); + D_GOTO(out, rc = -DER_HG); } D_DEBUG(DB_NET, "New context(idx:%d), listen address: %s.\n", diff --git a/src/cart/crt_hg.h b/src/cart/crt_hg.h index 3e614884e9a..95655404583 100644 --- a/src/cart/crt_hg.h +++ b/src/cart/crt_hg.h @@ -31,51 +31,26 @@ struct crt_rpc_priv; struct crt_common_hdr; struct crt_corpc_hdr; -/** type of NA plugin */ -enum crt_na_type { - CRT_NA_SM = 0, - CRT_NA_OFI_SOCKETS = 1, - CRT_NA_OFI_VERBS_RXM = 2, - CRT_NA_OFI_GNI = 3, - CRT_NA_OFI_PSM2 = 4, - CRT_NA_OFI_TCP_RXM = 5, - CRT_NA_OFI_CXI = 6, - CRT_NA_OFI_LAST = CRT_NA_OFI_CXI, - CRT_NA_UCX_RC = 7, - CRT_NA_UCX_UD = 8, - CRT_NA_UCX_RC_UD = 9, - CRT_NA_UCX_RC_O = 10, - CRT_NA_UCX_UD_O = 11, - CRT_NA_UCX_RC_UD_O = 12, - CRT_NA_UCX_RC_X = 13, - CRT_NA_UCX_UD_X = 14, - CRT_NA_UCX_RC_UD_X = 15, - CRT_NA_UCX_DC_X = 16, - - /* Note: This entry should be the last valid one in enum */ - CRT_NA_COUNT, - CRT_NA_UNKNOWN = -1, -}; -enum crt_na_type -crt_prov_str_to_na_type(const char *prov_str); +crt_provider_t +crt_prov_str_to_prov(const char *prov_str); int -crt_hg_parse_uri(const char *uri, enum crt_na_type *prov, char *addr); +crt_hg_parse_uri(const char *uri, crt_provider_t *prov, char *addr); static inline bool -crt_na_type_is_ucx(int na_type) +crt_provider_is_ucx(crt_provider_t prov) { - return (na_type >= CRT_NA_UCX_RC) && - (na_type < CRT_NA_COUNT); + return (prov >= CRT_PROV_UCX_RC) && + (prov <= CRT_PROV_UCX_LAST); } static inline bool -crt_na_type_is_ofi(int na_type) +crt_provider_is_ofi(crt_provider_t prov) { - return (na_type >= CRT_NA_OFI_SOCKETS) && - (na_type <= CRT_NA_OFI_LAST); + return (prov >= CRT_PROV_OFI_SOCKETS) && + (prov <= CRT_PROV_OFI_LAST); } struct crt_na_dict { diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 39f846d1a0c..5dd0fe28a6c 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -23,7 +23,8 @@ dump_envariables(void) { int i; char *val; - char *envars[] = {"CRT_PHY_ADDR_STR", "D_LOG_STDERR_IN_LOG", + char *envars[] = {"D_PROVIDER", "D_INTERFACE", "D_DOMAIN", "D_PORT", + "CRT_PHY_ADDR_STR", "D_LOG_STDERR_IN_LOG", "D_LOG_FILE", "D_LOG_FILE_APPEND_PID", "D_LOG_MASK", "DD_MASK", "DD_STDERR", "DD_SUBSYS", "CRT_TIMEOUT", "CRT_ATTACH_INFO_PATH", "OFI_PORT", "OFI_INTERFACE", "OFI_DOMAIN", "CRT_CREDIT_EP_CTX", @@ -38,6 +39,21 @@ dump_envariables(void) } } +static void +dump_opt(crt_init_options_t *opt) +{ + D_INFO("options:\n"); + D_INFO("crt_timeout = %d\n", opt->cio_crt_timeout); + D_INFO("max_ctx_num = %d\n", opt->cio_ctx_max_num); + D_INFO("swim_idx = %d\n", opt->cio_swim_crt_idx); + D_INFO("provider = %s\n", opt->cio_provider); + D_INFO("interface = %s\n", opt->cio_interface); + D_INFO("domain = %s\n", opt->cio_domain); +} + +static int +crt_na_config_init(crt_provider_t provider, char *interface, char *domain, char *port); + /* Workaround for CART-890 */ static void mem_pin_workaround(void) @@ -84,21 +100,54 @@ mem_pin_workaround(void) } static void -prov_data_init(struct crt_prov_gdata *prov_data, int provider, - bool sep_mode, int max_ctx_num, - uint32_t max_exp_size, uint32_t max_unexp_size) +prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, + bool primary, crt_init_options_t *opt) + { + bool share_addr = false; + bool set_sep = false; + uint32_t ctx_num = 0; + uint32_t max_expect_size = 0; + uint32_t max_unexpect_size = 0; + uint32_t max_num_ctx = 256; + + /* Assume for now this option is only available for a primary provider */ + if (primary) { + if (opt && opt->cio_sep_override) { + if (opt->cio_use_sep) + set_sep = true; + max_num_ctx = opt->cio_ctx_max_num; + } else { + share_addr = false; + ctx_num = 0; + + d_getenv_bool("CRT_CTX_SHARE_ADDR", &share_addr); + if (share_addr) + set_sep = true; + + d_getenv_int("CRT_CTX_NUM", &ctx_num); + max_num_ctx = ctx_num; + } + } + + if (opt && opt->cio_use_expected_size) + max_expect_size = opt->cio_max_expected_size; + + if (opt && opt->cio_use_unexpected_size) + max_unexpect_size = opt->cio_max_unexpected_size; + prov_data->cpg_inited = true; prov_data->cpg_provider = provider; prov_data->cpg_ctx_num = 0; - prov_data->cpg_sep_mode = sep_mode; + prov_data->cpg_sep_mode = set_sep; prov_data->cpg_contig_ports = true; - prov_data->cpg_ctx_max_num = max_ctx_num; - prov_data->cpg_max_exp_size = max_exp_size; - prov_data->cpg_max_unexp_size = max_unexp_size; + prov_data->cpg_ctx_max_num = max_num_ctx; + prov_data->cpg_max_exp_size = max_expect_size; + prov_data->cpg_max_unexp_size = max_unexpect_size; + prov_data->cpg_primary = primary; - D_DEBUG(DB_ALL, "Provider (%d), sep_mode (%d), sizes (%d/%d)\n", - provider, sep_mode, max_exp_size, max_unexp_size); + D_DEBUG(DB_ALL, "prov_idx: %d primary: %d sep_mode: %d sizes: (%d/%d)\n", + provider, primary, set_sep, max_expect_size, max_unexpect_size); D_INIT_LIST_HEAD(&prov_data->cpg_ctx_list); } @@ -132,7 +181,7 @@ static int data_init(int server, crt_init_options_t *opt) crt_gdata.cg_refcount = 0; crt_gdata.cg_inited = 0; - crt_gdata.cg_init_prov = CRT_NA_OFI_SOCKETS; + crt_gdata.cg_primary_prov = CRT_PROV_OFI_SOCKETS; d_srand(d_timeus_secdiff(0) + getpid()); start_rpcid = ((uint64_t)d_rand()) << 32; @@ -298,24 +347,152 @@ crt_plugin_fini(void) D_MUTEX_DESTROY(&crt_plugin_gdata.cpg_mutex); } +static int +__split_arg(char *s_arg_to_split, char **first_arg, char **second_arg) +{ + char *save_ptr = NULL; + char *arg_to_split; + + D_ASSERT(first_arg != NULL); + D_ASSERT(second_arg != NULL); + + /* no-op, not an error case */ + if (s_arg_to_split == NULL) { + *first_arg = NULL; + *second_arg = NULL; + return DER_SUCCESS; + } + + D_STRNDUP(arg_to_split, s_arg_to_split, 255); + if (!arg_to_split) { + *first_arg = NULL; + *second_arg = NULL; + return -DER_NOMEM; + } + + *first_arg = 0; + *second_arg = 0; + + *first_arg = strtok_r(arg_to_split, ",", &save_ptr); + *second_arg = save_ptr; + + return DER_SUCCESS; +} + + +int +crt_str_to_provider(const char *str_provider) +{ + int provider_idx = CRT_PROV_UNKNOWN; + int i; + + if (str_provider == NULL) + return provider_idx; + + for (i = 0; crt_na_dict[i].nad_str != NULL; i++) { + + if (!strncmp(str_provider, crt_na_dict[i].nad_str, + strlen(crt_na_dict[i].nad_str) + 1) || + (crt_na_dict[i].nad_alt_str && + !strncmp(str_provider, crt_na_dict[i].nad_alt_str, + strlen(crt_na_dict[i].nad_alt_str) + 1))) { + provider_idx = crt_na_dict[i].nad_type; + break; + } + } + + return provider_idx; +} + +static int +check_grpid(crt_group_id_t grpid) +{ + int rc = 0; + + if (grpid == NULL) + return rc; + + if (crt_validate_grpid(grpid) != 0) { + D_ERROR("grpid contains invalid characters " + "or is too long\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (strcmp(grpid, CRT_DEFAULT_GRPID) == 0) { + D_ERROR("invalid client grpid (same as " + "CRT_DEFAULT_GRPID).\n"); + D_GOTO(out, rc = -DER_INVAL); + } +out: + return rc; +} + +static int +prov_settings_apply(crt_provider_t prov, crt_init_options_t *opt) +{ + char *srx_env; + int rc = 0; + + /* rxm and verbs providers only works with regular EP */ + if ((prov == CRT_PROV_OFI_VERBS_RXM || + prov == CRT_PROV_OFI_TCP_RXM) && + crt_provider_is_sep(prov)) { + D_WARN("set CRT_CTX_SHARE_ADDR as 1 is invalid " + "for current provider, ignoring it.\n"); + crt_provider_set_sep(prov, false); + } + + if (prov == CRT_PROV_OFI_VERBS_RXM || + prov == CRT_PROV_OFI_TCP_RXM) { + + srx_env = getenv("FI_OFI_RXM_USE_SRX"); + if (srx_env == NULL) { + D_INFO("FI_OFI_RXM_USE_SRX not set, set=1\n"); + setenv("FI_OFI_RXM_USE_SRX", "1", true); + } + } + + /* Print notice that "ofi+psm2" will be deprecated*/ + if (prov == CRT_PROV_OFI_PSM2) { + D_WARN("\"ofi+psm2\" will be deprecated soon.\n"); + setenv("FI_PSM2_NAME_SERVER", "1", true); + D_DEBUG(DB_ALL, "Setting FI_PSM2_NAME_SERVER to 1\n"); + } + + + return rc; +} + int crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) { - char *addr_env; + char *provider_env; + char *interface_env; + char *domain_env; + char *tmp; struct timeval now; unsigned int seed; const char *path; bool server; - bool provider_found = false; - int plugin_idx; - int prov; - bool set_sep = false; - int max_num_ctx = 256; - uint32_t ctx_num; - bool share_addr; int rc = 0; + char *provider_str0 = NULL; + char *provider_str1 = NULL; + crt_provider_t primary_provider; + crt_provider_t secondary_provider; + crt_provider_t tmp_prov; + char *port_str, *port0, *port1; + char *iface0, *iface1, *domain0, *domain1; + int num_secondaries = 0; + int i; server = flags & CRT_FLAG_BIT_SERVER; + port_str = NULL; + port0 = NULL; + port1 = NULL; + iface0 = NULL; + iface1 = NULL; + domain0 = NULL; + domain1 = NULL; /* d_log_init is reference counted */ rc = d_log_init(); @@ -328,6 +505,9 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_INFO("libcart version %s initializing\n", CART_VERSION); + if (opt) + dump_opt(opt); + /* d_fault_inject_init() is reference counted */ rc = d_fault_inject_init(); if (rc != DER_SUCCESS && rc != -DER_NOSYS) { @@ -335,19 +515,10 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_GOTO(out, rc); } - if (grpid != NULL) { - if (crt_validate_grpid(grpid) != 0) { - D_ERROR("grpid contains invalid characters " - "or is too long\n"); - D_GOTO(out, rc = -DER_INVAL); - } - - if (strcmp(grpid, CRT_DEFAULT_GRPID) == 0) { - D_ERROR("invalid client grpid (same as " - "CRT_DEFAULT_GRPID).\n"); - D_GOTO(out, rc = -DER_INVAL); - } - } + /* check the group name */ + rc = check_grpid(grpid); + if (rc != DER_SUCCESS) + D_GOTO(out, rc); if (gdata_init_flag == 0) { rc = data_init(server, opt); @@ -369,10 +540,6 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) crt_gdata.cg_auto_swim_disable = (flags & CRT_FLAG_BIT_AUTO_SWIM_DISABLE) ? 1 : 0; - D_DEBUG(DB_ALL, "Server bit set to %d\n", server); - D_DEBUG(DB_ALL, "Swim auto disable set to %d\n", - crt_gdata.cg_auto_swim_disable); - path = getenv("CRT_ATTACH_INFO_PATH"); if (path != NULL && strlen(path) > 0) { rc = crt_group_config_path_set(path); @@ -385,102 +552,117 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) } if (opt && opt->cio_provider) - addr_env = opt->cio_provider; - else - addr_env = (crt_phy_addr_t)getenv(CRT_PHY_ADDR_ENV); + provider_env = opt->cio_provider; + else { + provider_env = getenv(CRT_PHY_ADDR_ENV); - if (addr_env == NULL) { - D_DEBUG(DB_ALL, "ENV %s not found.\n", CRT_PHY_ADDR_ENV); - goto do_init; - } else { - D_DEBUG(DB_ALL, "EVN %s: %s.\n", CRT_PHY_ADDR_ENV, addr_env); + tmp = getenv("D_PROVIDER"); + if (tmp) + provider_env = tmp; } - provider_found = false; - for (plugin_idx = 0; crt_na_dict[plugin_idx].nad_str != NULL; - plugin_idx++) { - if (!strncmp(addr_env, crt_na_dict[plugin_idx].nad_str, - strlen(crt_na_dict[plugin_idx].nad_str) + 1) || - (crt_na_dict[plugin_idx].nad_alt_str && - !strncmp(addr_env, crt_na_dict[plugin_idx].nad_alt_str, - strlen(crt_na_dict[plugin_idx].nad_alt_str) + 1))) { - provider_found = true; - crt_gdata.cg_init_prov = - crt_na_dict[plugin_idx].nad_type; - break; - } + if (opt && opt->cio_interface) + interface_env = opt->cio_interface; + else { + interface_env = getenv("OFI_INTERFACE"); + + tmp = getenv("D_INTERFACE"); + if (tmp) + interface_env = tmp; } - if (!provider_found) { - D_ERROR("Requested provider %s not found\n", addr_env); - D_GOTO(out, rc = -DER_NONEXIST); + if (opt && opt->cio_domain) + domain_env = opt->cio_domain; + else { + domain_env = getenv("OFI_DOMAIN"); + + tmp = getenv("D_DOMAIN"); + if (tmp) + domain_env = tmp; } -do_init: - prov = crt_gdata.cg_init_prov; - if (opt && opt->cio_sep_override) { - if (opt->cio_use_sep) - set_sep = true; - max_num_ctx = opt->cio_ctx_max_num; - } else { - share_addr = false; - ctx_num = 0; + if (domain_env == NULL) { + D_DEBUG(DB_ALL, "OFI_DOMAIN is not set. Setting it to %s\n", interface_env); + domain_env = interface_env; + } - d_getenv_bool("CRT_CTX_SHARE_ADDR", &share_addr); - if (share_addr) - set_sep = true; - d_getenv_int("CRT_CTX_NUM", &ctx_num); - max_num_ctx = ctx_num; + if (opt && opt->cio_port) + port_str = opt->cio_port; + else { + port_str = getenv("OFI_PORT"); + + tmp = getenv("D_PORT"); + if (tmp) + port_str = tmp; } - uint32_t max_expect_size = 0; - uint32_t max_unexpect_size = 0; + rc = __split_arg(provider_env, &provider_str0, &provider_str1); + if (rc != 0) + D_GOTO(out, rc); - if (opt && opt->cio_use_expected_size) - max_expect_size = opt->cio_max_expected_size; + primary_provider = crt_str_to_provider(provider_str0); + secondary_provider = crt_str_to_provider(provider_str1); - if (opt && opt->cio_use_unexpected_size) - max_unexpect_size = opt->cio_max_unexpected_size; + if (primary_provider == CRT_PROV_UNKNOWN) { + D_ERROR("Requested provider %s not found\n", provider_env); + D_GOTO(out, rc = -DER_NONEXIST); + } - prov_data_init(&crt_gdata.cg_prov_gdata[prov], - prov, set_sep, max_num_ctx, - max_expect_size, max_unexpect_size); + rc = __split_arg(interface_env, &iface0, &iface1); + if (rc != 0) + D_GOTO(out, rc); + rc = __split_arg(domain_env, &domain0, &domain1); + if (rc != 0) + D_GOTO(out, rc); + rc = __split_arg(port_str, &port0, &port1); + if (rc != 0) + D_GOTO(out, rc); - /* rxm and verbs providers only works with regular EP */ - if ((prov == CRT_NA_OFI_VERBS_RXM || - prov == CRT_NA_OFI_TCP_RXM) && - crt_provider_is_sep(prov)) { - D_WARN("set CRT_CTX_SHARE_ADDR as 1 is invalid " - "for current provider, ignoring it.\n"); - crt_provider_set_sep(prov, false); + if (iface0 == NULL) { + D_ERROR("Empty interface specified\n"); + D_GOTO(out, rc = -DER_INVAL); } - if (prov == CRT_NA_OFI_VERBS_RXM || - prov == CRT_NA_OFI_TCP_RXM) { - char *srx_env; + prov_data_init(&crt_gdata.cg_prov_gdata[primary_provider], + primary_provider, true, opt); + prov_settings_apply(primary_provider, opt); + crt_gdata.cg_primary_prov = primary_provider; - srx_env = getenv("FI_OFI_RXM_USE_SRX"); - if (srx_env == NULL) { - D_INFO("FI_OFI_RXM_USE_SRX not set, set=1\n"); - setenv("FI_OFI_RXM_USE_SRX", "1", true); - } + rc = crt_na_config_init(primary_provider, iface0, domain0, port0); + if (rc != 0) { + D_ERROR("crt_na_config_init() failed, "DF_RC"\n", DP_RC(rc)); + D_GOTO(out, rc); } - /* Print notice that "ofi+psm2" will be deprecated*/ - if (prov == CRT_NA_OFI_PSM2) { - D_WARN("\"ofi+psm2\" will be deprecated soon.\n"); - setenv("FI_PSM2_NAME_SERVER", "1", true); - D_DEBUG(DB_ALL, "Setting FI_PSM2_NAME_SERVER to 1\n"); + if (secondary_provider != CRT_PROV_UNKNOWN) { + num_secondaries = 1; + + if (port1 == NULL || port1[0] == '\0') { + port1 = port0; + } + + D_ALLOC_ARRAY(crt_gdata.cg_secondary_provs, num_secondaries); + if (crt_gdata.cg_secondary_provs == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + crt_gdata.cg_secondary_provs[0] = secondary_provider; } - if (crt_na_type_is_ofi(prov) || crt_na_type_is_ucx(prov)) { - rc = crt_na_ofi_config_init(prov, opt); + + for (i = 0; i < num_secondaries; i++) { + tmp_prov = crt_gdata.cg_secondary_provs[i]; + + prov_data_init(&crt_gdata.cg_prov_gdata[tmp_prov], + tmp_prov, false, opt); + prov_settings_apply(tmp_prov, opt); + + rc = crt_na_config_init(tmp_prov, iface1, domain1, port1); if (rc != 0) { - D_ERROR("crt_na_ofi_config_init() failed, " - DF_RC"\n", DP_RC(rc)); + D_ERROR("crt_na_config_init() failed, "DF_RC"\n", DP_RC(rc)); D_GOTO(out, rc); } } + crt_gdata.cg_num_secondary_provs = num_secondaries; rc = crt_hg_init(); if (rc != 0) { @@ -490,16 +672,14 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) rc = crt_grp_init(grpid); if (rc != 0) { - D_ERROR("crt_grp_init() failed, "DF_RC"\n", - DP_RC(rc)); + D_ERROR("crt_grp_init() failed, "DF_RC"\n", DP_RC(rc)); D_GOTO(cleanup, rc); } if (crt_plugin_gdata.cpg_inited == 0) { rc = crt_plugin_init(); if (rc != 0) { - D_ERROR("crt_plugin_init() failed, "DF_RC"\n", - DP_RC(rc)); + D_ERROR("crt_plugin_init() failed, "DF_RC"\n", DP_RC(rc)); D_GOTO(cleanup, rc); } } @@ -508,15 +688,13 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) rc = crt_opc_map_create(); if (rc != 0) { - D_ERROR("crt_opc_map_create() failed, "DF_RC"\n", - DP_RC(rc)); + D_ERROR("crt_opc_map_create() failed, "DF_RC"\n", DP_RC(rc)); D_GOTO(self_test, rc); } rc = crt_internal_rpc_register(server); if (rc != 0) { - D_ERROR("crt_internal_rpc_register() failed, "DF_RC"\n", - DP_RC(rc)); + D_ERROR("crt_internal_rpc_register() failed, "DF_RC"\n", DP_RC(rc)); D_GOTO(self_test, rc); } @@ -525,8 +703,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) crt_gdata.cg_inited = 1; } else { if (crt_gdata.cg_server == false && server == true) { - D_ERROR("CRT initialized as client, cannot set as " - "server again.\n"); + D_ERROR("CRT initialized as client, cannot set as server again.\n"); D_GOTO(unlock, rc = -DER_INVAL); } } @@ -547,12 +724,20 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) if (crt_gdata.cg_opc_map != NULL) crt_opc_map_destroy(crt_gdata.cg_opc_map); - crt_na_ofi_config_fini(crt_gdata.cg_init_prov); + crt_na_config_fini(crt_gdata.cg_primary_prov); unlock: D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); out: + /* + * We don't need to free port1, iface1 and domain1 as + * they occupy the same original string as port0, iface0 and domain0 + */ + D_FREE(port0); + D_FREE(iface0); + D_FREE(domain0); + D_FREE(provider_str0); if (rc != 0) { D_ERROR("failed, "DF_RC"\n", DP_RC(rc)); d_fault_inject_fini(); @@ -572,7 +757,7 @@ crt_finalize(void) { int local_rc; int rc = 0; - + int i; struct crt_prov_gdata *prov_data; D_RWLOCK_WRLOCK(&crt_gdata.cg_rwlock); @@ -588,10 +773,10 @@ crt_finalize(void) crt_self_test_fini(); /* TODO: Needs to happen for every initialized provider */ - prov_data = &crt_gdata.cg_prov_gdata[crt_gdata.cg_init_prov]; + prov_data = &crt_gdata.cg_prov_gdata[crt_gdata.cg_primary_prov]; if (prov_data->cpg_ctx_num > 0) { - D_ASSERT(!crt_context_empty(crt_gdata.cg_init_prov, + D_ASSERT(!crt_context_empty(crt_gdata.cg_primary_prov, CRT_LOCKED)); D_ERROR("cannot finalize, current ctx_num(%d).\n", prov_data->cpg_ctx_num); @@ -599,7 +784,7 @@ crt_finalize(void) D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); D_GOTO(out, rc = -DER_BUSY); } else { - D_ASSERT(crt_context_empty(crt_gdata.cg_init_prov, + D_ASSERT(crt_context_empty(crt_gdata.cg_primary_prov, CRT_LOCKED)); } @@ -633,7 +818,12 @@ crt_finalize(void) crt_gdata.cg_inited = 0; gdata_init_flag = 0; - crt_na_ofi_config_fini(crt_gdata.cg_init_prov); + crt_na_config_fini(crt_gdata.cg_primary_prov); + + if (crt_gdata.cg_secondary_provs != NULL) { + for (i = 0; i < crt_gdata.cg_num_secondary_provs; i++) + crt_na_config_fini(crt_gdata.cg_secondary_provs[i]); + } } else { D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); } @@ -655,7 +845,7 @@ crt_finalize(void) static inline bool is_integer_str(char *str) { - char *p; + const char *p; p = str; if (p == NULL || strlen(p) == 0) @@ -745,152 +935,122 @@ crt_port_range_verify(int port) } } -int crt_na_ofi_config_init(int provider, crt_init_options_t *opt) + +static int +crt_na_fill_ip_addr(struct crt_na_config *na_cfg) { - char *port_str; - char *interface; - int port; struct ifaddrs *if_addrs = NULL; struct ifaddrs *ifa = NULL; void *tmp_ptr; const char *ip_str = NULL; - char *domain = NULL; int rc = 0; - struct crt_na_ofi_config *na_ofi_cfg; - - na_ofi_cfg = &crt_gdata.cg_prov_gdata[provider].cpg_na_ofi_config; - - if (opt && opt->cio_interface) - interface = opt->cio_interface; - else - interface = getenv("OFI_INTERFACE"); - - if (interface != NULL && strlen(interface) > 0) { - D_STRNDUP(na_ofi_cfg->noc_interface, interface, 64); - if (na_ofi_cfg->noc_interface == NULL) - D_GOTO(out, rc = -DER_NOMEM); - } else { - na_ofi_cfg->noc_interface = NULL; - D_ERROR("ENV OFI_INTERFACE not set."); - D_GOTO(out, rc = -DER_INVAL); - } - - if (opt && opt->cio_domain) - domain = opt->cio_domain; - else - domain = getenv("OFI_DOMAIN"); - - if (domain == NULL) { - D_DEBUG(DB_ALL, "OFI_DOMAIN is not set. Setting it to %s\n", - interface); - if (provider == CRT_NA_OFI_VERBS_RXM || - provider == CRT_NA_OFI_CXI) - D_WARN("Domain and interface name expected to be different " - "for verbs/cxi, it might fail without specifying OFI_DOMAIN\n"); - domain = interface; - } - - D_STRNDUP(na_ofi_cfg->noc_domain, domain, 64); - if (!na_ofi_cfg->noc_domain) - D_GOTO(out, rc = -DER_NOMEM); - rc = getifaddrs(&if_addrs); if (rc != 0) { - D_ERROR("cannot getifaddrs, errno: %d(%s).\n", - errno, strerror(errno)); + D_ERROR("cannot getifaddrs, errno: %d(%s).\n", errno, strerror(errno)); D_GOTO(out, rc = -DER_PROTO); } for (ifa = if_addrs; ifa != NULL; ifa = ifa->ifa_next) { - if (strcmp(ifa->ifa_name, na_ofi_cfg->noc_interface)) - continue; if (ifa->ifa_addr == NULL) continue; - memset(na_ofi_cfg->noc_ip_str, 0, INET_ADDRSTRLEN); + if (strcmp(ifa->ifa_name, na_cfg->noc_interface)) + continue; + + memset(na_cfg->noc_ip_str, 0, INET_ADDRSTRLEN); + if (ifa->ifa_addr->sa_family == AF_INET) { /* check it is a valid IPv4 Address */ - tmp_ptr = - &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; - ip_str = inet_ntop(AF_INET, tmp_ptr, - na_ofi_cfg->noc_ip_str, - INET_ADDRSTRLEN); + tmp_ptr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; + ip_str = inet_ntop(AF_INET, tmp_ptr, na_cfg->noc_ip_str, INET_ADDRSTRLEN); if (ip_str == NULL) { - D_ERROR("inet_ntop failed, errno: %d(%s).\n", - errno, strerror(errno)); + D_ERROR("inet_ntop errno: %d(%s).\n", errno, strerror(errno)); freeifaddrs(if_addrs); D_GOTO(out, rc = -DER_PROTO); } - /* - * D_DEBUG("Get interface %s IPv4 Address %s\n", - * ifa->ifa_name, na_ofi_conf.noc_ip_str); - */ break; } else if (ifa->ifa_addr->sa_family == AF_INET6) { /* check it is a valid IPv6 Address */ /* * tmp_ptr = * &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; - * inet_ntop(AF_INET6, tmp_ptr, na_ofi_conf.noc_ip_str, + * inet_ntop(AF_INET6, tmp_ptr, na_conf.noc_ip_str, * INET6_ADDRSTRLEN); * D_DEBUG("Get %s IPv6 Address %s\n", - * ifa->ifa_name, na_ofi_conf.noc_ip_str); + * ifa->ifa_name, na_conf.noc_ip_str); */ } } freeifaddrs(if_addrs); if (ip_str == NULL) { - D_ERROR("no IP addr found on interface %s\n", interface); + D_ERROR("no IP addr found on interface %s\n", na_cfg->noc_interface); D_GOTO(out, rc = -DER_PROTO); } - port = -1; +out: + return rc; +} - if (opt && opt->cio_port) - port_str = opt->cio_port; - else - port_str = getenv("OFI_PORT"); +static int +crt_na_config_init(crt_provider_t provider, char *interface, char *domain, char *port_str) +{ + struct crt_na_config *na_cfg; + int rc = 0; + int port = -1; + + if (provider == CRT_PROV_SM) + return 0; + + na_cfg = &crt_gdata.cg_prov_gdata[provider].cpg_na_config; + D_STRNDUP(na_cfg->noc_interface, interface, 64); + if (!na_cfg->noc_interface) + D_GOTO(out, rc = -DER_NOMEM); + + if (domain) { + D_STRNDUP(na_cfg->noc_domain, domain, 64); + if (!na_cfg->noc_domain) + D_GOTO(out, rc = -DER_NOMEM); + } + crt_na_fill_ip_addr(na_cfg); if (crt_is_service() && port_str != NULL && strlen(port_str) > 0) { if (!is_integer_str(port_str)) { - D_DEBUG(DB_ALL, "ignoring invalid OFI_PORT %s.", - port_str); + D_DEBUG(DB_ALL, "ignoring invalid OFI_PORT %s.", port_str); } else { port = atoi(port_str); - if (provider == CRT_NA_OFI_SOCKETS || - provider == CRT_NA_OFI_VERBS_RXM || - provider == CRT_NA_OFI_TCP_RXM) + if (provider == CRT_PROV_OFI_SOCKETS || + provider == CRT_PROV_OFI_VERBS_RXM || + provider == CRT_PROV_OFI_TCP_RXM) crt_port_range_verify(port); - if (provider == CRT_NA_OFI_PSM2) + if (provider == CRT_PROV_OFI_PSM2) port = (uint16_t)port << 8; - D_DEBUG(DB_ALL, "OFI_PORT %d, using it as service " - "port.\n", port); + D_DEBUG(DB_ALL, "OFI_PORT %d, using it as service port.\n", port); } - } else if (provider == CRT_NA_OFI_PSM2) { + } else if (provider == CRT_PROV_OFI_PSM2) { rc = crt_get_port_psm2(&port); if (rc != 0) { D_ERROR("crt_get_port failed, rc: %d.\n", rc); D_GOTO(out, rc); } } - na_ofi_cfg->noc_port = port; + na_cfg->noc_port = port; out: if (rc != -DER_SUCCESS) { - D_FREE(na_ofi_cfg->noc_interface); - D_FREE(na_ofi_cfg->noc_domain); + D_FREE(na_cfg->noc_interface); + D_FREE(na_cfg->noc_domain); } return rc; } -void crt_na_ofi_config_fini(int provider) +void crt_na_config_fini(crt_provider_t provider) { - struct crt_na_ofi_config *na_ofi_cfg; + struct crt_na_config *na_cfg; - na_ofi_cfg = &crt_gdata.cg_prov_gdata[provider].cpg_na_ofi_config; - D_FREE(na_ofi_cfg->noc_interface); - D_FREE(na_ofi_cfg->noc_domain); - na_ofi_cfg->noc_port = 0; + na_cfg = &crt_gdata.cg_prov_gdata[provider].cpg_na_config; + D_FREE(na_cfg->noc_interface); + D_FREE(na_cfg->noc_domain); + na_cfg->noc_port = 0; } diff --git a/src/cart/crt_internal_fns.h b/src/cart/crt_internal_fns.h index 0c8e1f5d478..f0388955fc1 100644 --- a/src/cart/crt_internal_fns.h +++ b/src/cart/crt_internal_fns.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2021 Intel Corporation. + * (C) Copyright 2016-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -14,6 +14,8 @@ /** crt_init.c */ bool crt_initialized(void); +int crt_str_to_provider(const char *provider); + /** crt_register.c */ int crt_opc_map_create(void); void crt_opc_map_destroy(struct crt_opc_map *map); diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 8c71e07182b..742f578adaa 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -26,7 +26,7 @@ struct crt_hg_gdata; struct crt_grp_gdata; -struct crt_na_ofi_config { +struct crt_na_config { int32_t noc_port; char *noc_interface; char *noc_domain; @@ -38,7 +38,7 @@ struct crt_prov_gdata { /** NA plugin type */ int cpg_provider; - struct crt_na_ofi_config cpg_na_ofi_config; + struct crt_na_config cpg_na_config; /** Context0 URI */ char cpg_addr[CRT_ADDR_STR_MAX_LEN]; @@ -55,6 +55,7 @@ struct crt_prov_gdata { /** Set of flags */ unsigned int cpg_sep_mode : 1, + cpg_primary : 1, cpg_contig_ports : 1, cpg_inited : 1; }; @@ -62,11 +63,13 @@ struct crt_prov_gdata { /* CaRT global data */ struct crt_gdata { - /** Provider initialized at crt_init() time */ - int cg_init_prov; + /** Providers iinitialized at crt_init() time */ + int cg_primary_prov; + int cg_num_secondary_provs; + int *cg_secondary_provs; /** Provider specific data */ - struct crt_prov_gdata cg_prov_gdata[CRT_NA_COUNT]; + struct crt_prov_gdata cg_prov_gdata[CRT_PROV_COUNT]; /** global timeout value (second) for all RPCs */ uint32_t cg_timeout; @@ -167,6 +170,7 @@ struct crt_context { d_list_t cc_link; /** link to gdata.cg_ctx_list */ int cc_idx; /** context index */ struct crt_hg_context cc_hg_ctx; /** HG context */ + bool cc_primary; /** primary provider flag */ /* callbacks */ void *cc_rpc_cb_arg; @@ -283,7 +287,6 @@ struct crt_opc_map { }; -int crt_na_ofi_config_init(int provider, crt_init_options_t *opt); -void crt_na_ofi_config_fini(int provider); +void crt_na_config_fini(int provider); #endif /* __CRT_INTERNAL_TYPES_H__ */ diff --git a/src/cart/crt_swim.c b/src/cart/crt_swim.c index 1b4249d17c7..be4474420b3 100644 --- a/src/cart/crt_swim.c +++ b/src/cart/crt_swim.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2019-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -742,7 +742,7 @@ static void crt_swim_update_last_unpack_hlc(struct crt_swim_membs *csm) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_init_prov); + ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { uint64_t hlc = ctx->cc_last_unpack_hlc; diff --git a/src/include/cart/api.h b/src/include/cart/api.h index ab5d7dc20a9..9814c49ceb8 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -83,6 +83,29 @@ crt_init(crt_group_id_t grpid, uint32_t flags) int crt_context_create(crt_context_t *crt_ctx); +/** + * Create CRT transport context on one of secondary providers. + * + * \param[out] crt_ctx created CRT transport context + * \param[in] idx Currently unused. Specifies which + * of the secondary providers to use. + * + * \return DER_SUCCESS on success, negative value if error + */ +int +crt_context_create_secondary(crt_context_t *crt_ctx, int idx); + +/** + * Check whether specified context is primary or secondary. + * + * \param[in] crt_ctx CRT transport context + * + * \return true if primary, false otherwise + */ +bool +crt_context_is_primary(crt_context_t crt_ctx); + + /** * Set the timeout value for all RPC requests created on the specified context. * Setting the timeout after crt_req_create() call will not affect already @@ -174,6 +197,17 @@ crt_context_idx(crt_context_t crt_ctx, int *ctx_idx); int crt_context_num(int *ctx_num); +/** + * Return URI associated with the context. + * + * \param[in] crt_ctx CRT transport context + * \param[out] uri Returned uri. + * + * \return DER_SUCCESS on success, negative value in error. + */ +int +crt_context_uri_get(crt_context_t crt_ctx, char **uri); + /** * Finalize CRT transport layer. Must be called on both the server side and * client side before exit. This function is reference counted. diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 7ea98fb0873..cdcfa4a6a4d 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -88,6 +88,35 @@ typedef struct crt_init_options { } crt_init_options_t; +/** + * Enumeration specifying providers supported by the library + */ +typedef enum { + CRT_PROV_SM = 0, + CRT_PROV_OFI_SOCKETS = 1, + CRT_PROV_OFI_VERBS_RXM = 2, + CRT_PROV_OFI_GNI = 3, + CRT_PROV_OFI_PSM2 = 4, + CRT_PROV_OFI_TCP_RXM = 5, + CRT_PROV_OFI_CXI = 6, + CRT_PROV_OFI_LAST = CRT_PROV_OFI_CXI, + CRT_PROV_UCX_RC = 7, + CRT_PROV_UCX_UD = 8, + CRT_PROV_UCX_RC_UD = 9, + CRT_PROV_UCX_RC_O = 10, + CRT_PROV_UCX_UD_O = 11, + CRT_PROV_UCX_RC_UD_O = 12, + CRT_PROV_UCX_RC_X = 13, + CRT_PROV_UCX_UD_X = 14, + CRT_PROV_UCX_RC_UD_X = 15, + CRT_PROV_UCX_DC_X = 16, + CRT_PROV_UCX_LAST = CRT_PROV_UCX_DC_X, + /* Note: This entry should be the last valid one in enum */ + CRT_PROV_COUNT, + CRT_PROV_UNKNOWN = -1, +} crt_provider_t; + + typedef int crt_status_t; /** * CRT uses a string as the group ID diff --git a/src/tests/ftest/cart/SConscript b/src/tests/ftest/cart/SConscript index f7aec23abb1..d53931a178d 100644 --- a/src/tests/ftest/cart/SConscript +++ b/src/tests/ftest/cart/SConscript @@ -41,6 +41,7 @@ import os import daos_build SIMPLE_TEST_SRC = ['threaded_client.c', 'dual_iface_server.c', + 'dual_provider_server.c', 'dual_provider_client.c', 'no_pmix_multi_ctx.c', 'threaded_server.c', 'test_corpc_prefwd.c', 'test_corpc_exclusive.c', diff --git a/src/tests/ftest/cart/dual_provider_client.c b/src/tests/ftest/cart/dual_provider_client.c new file mode 100644 index 00000000000..c73a5375575 --- /dev/null +++ b/src/tests/ftest/cart/dual_provider_client.c @@ -0,0 +1,303 @@ +/* + * (C) Copyright 2018-2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * Dual-provider client + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crt_utils.h" +#include "dual_provider_common.h" + +static int +g_do_shutdown; + +static void * +progress_function(void *data) +{ + crt_context_t *p_ctx = (crt_context_t *)data; + + while (g_do_shutdown == 0) + crt_progress(*p_ctx, 1000); + + crt_context_destroy(*p_ctx, 1); + + return NULL; +} + +static void +rpc_handle_reply(const struct crt_cb_info *info) +{ + sem_t *sem; + + D_ASSERTF(info->cci_rc == 0, "rpc response failed. rc: %d\n", + info->cci_rc); + + sem = (sem_t *)info->cci_arg; + sem_post(sem); +} + +int main(int argc, char **argv) +{ + crt_context_t crt_ctx; + crt_group_t *grp; + int rc; + sem_t sem; + pthread_t progress_thread; + crt_rpc_t *rpc = NULL; + struct RPC_PING_in *input; + crt_endpoint_t server_ep; + int i; + d_rank_list_t *rank_list; + d_rank_t rank; + int tag; + uint32_t grp_size; + char c; + char *arg_interface = NULL; + char *arg_domain = NULL; + char *arg_provider = NULL; + char *arg_num_ctx = NULL; + int num_remote_tags; + bool use_primary = true; + + while ((c = getopt(argc, argv, "i:p:d:s")) != -1) { + switch (c) { + case 'i': + arg_interface = optarg; + break; + case 'd': + arg_domain = optarg; + break; + case 'p': + arg_provider = optarg; + break; + case 'c': + arg_num_ctx = optarg; + break; + case 's': + use_primary = false; + break; + default: + printf("Error: unknown option %c\n", c); + return -1; + } + } + + rc = d_log_init(); + assert(rc == 0); + /* rank, num_attach_retries, is_server, assert_on_error */ + crtu_test_init(0, 20, false, true); + + DBG_PRINT("Client starting up\n"); + + rc = sem_init(&sem, 0, 0); + if (rc != 0) { + D_ERROR("sem_init() failed; rc=%d\n", rc); + assert(0); + } + + num_remote_tags = 1; + if (arg_num_ctx != NULL) + num_remote_tags = atoi(arg_num_ctx); + + DBG_PRINT("------------------------------------\n"); + DBG_PRINT("Provider: '%s' Interface: '%s' Domain: '%s'\n", + arg_provider, arg_interface, arg_domain); + DBG_PRINT("Number of remote tags: %d\n", num_remote_tags); + DBG_PRINT("Primary_provider: %d\n", use_primary); + DBG_PRINT("------------------------------------\n"); + crt_init_options_t init_opts = {0}; + + init_opts.cio_provider = arg_provider; + init_opts.cio_interface = arg_interface; + init_opts.cio_domain = arg_domain; + + rc = crt_init_opt(NULL, 0, &init_opts); + if (rc != 0) { + D_ERROR("crt_init() failed; rc=%d\n", rc); + assert(0); + } + + rc = crt_proto_register(&my_proto_fmt); + if (rc != 0) { + D_ERROR("crt_proto_register() failed; rc=%d\n", rc); + assert(0); + } + + rc = crt_context_create(&crt_ctx); + if (rc != 0) { + D_ERROR("crt_context_create() failed; rc=%d\n", rc); + assert(0); + } + + rc = pthread_create(&progress_thread, 0, + progress_function, &crt_ctx); + assert(rc == 0); + + int num_servers; + + rc = crt_group_view_create(SERVER_GROUP_NAME, &grp); + if (rc != 0) { + error_exit(); + } + + num_servers = 2; + + /* Parse /tmp/ files for uris. servers generate those */ + { + FILE *f; + char *filename; + char pri_uri0[255]; + char sec_uri0[255]; + int serv_rank; + + + for (serv_rank = 0; serv_rank < num_servers; serv_rank++) { + D_ASPRINTF(filename, "/tmp/%s_rank_%d_uris.cart", + SERVER_GROUP_NAME, serv_rank); + if (filename == NULL) + error_exit(); + + f = fopen(filename, "r"); + if (f == NULL) { + perror("failed: "); + error_exit(); + } + + rc = fscanf(f, "%254s", pri_uri0); + if (rc == EOF) + error_exit(); + + rc = fscanf(f, "%254s", sec_uri0); + if (rc == EOF) + error_exit(); + + printf("server_rank=%d\n", serv_rank); + printf("pri_uri=%s\n", pri_uri0); + printf("sec_uri=%s\n", sec_uri0); + + printf("Using %s URIs for ranks\n", + (use_primary) ? "primary" : "secondary"); + rc = crt_group_primary_rank_add(crt_ctx, grp, serv_rank, + (use_primary) ? pri_uri0 : sec_uri0); + fclose(f); + D_FREE(filename); + } + } + + /* Load group */ + rc = crt_group_size(grp, &grp_size); + if (rc != 0) { + D_ERROR("crt_group_size() failed; rc=%d\n", rc); + assert(0); + } + + rc = crt_group_ranks_get(grp, &rank_list); + if (rc != 0) { + D_ERROR("crt_group_ranks_get() failed; rc=%d\n", rc); + assert(0); + } + + DBG_PRINT("Group loaded, group size=%d\n", grp_size); + if (rank_list->rl_nr != grp_size) { + D_ERROR("rank_list differs in size. expected %d got %d\n", + grp_size, rank_list->rl_nr); + assert(0); + } + + + /* Cycle through all ranks and 8 tags and send rpc to each */ + for (i = 0; i < rank_list->rl_nr; i++) { + + rank = rank_list->rl_ranks[i]; + + for (tag = 0; tag < num_remote_tags; tag++) { + DBG_PRINT("Sending ping to %d:%d\n", rank, tag); + + server_ep.ep_rank = rank; + server_ep.ep_tag = tag; + server_ep.ep_grp = grp; + + rc = crt_req_create(crt_ctx, &server_ep, + RPC_PING, &rpc); + if (rc != 0) { + D_ERROR("crt_req_create() failed; rc=%d\n", + rc); + assert(0); + } + + input = crt_req_get(rpc); + + input->size1 = 1024; + input->size2 = 10; + rc = crt_req_send(rpc, rpc_handle_reply, &sem); + crtu_sem_timedwait(&sem, 10, __LINE__); + DBG_PRINT("Ping response from %d:%d\n", rank, tag); + } + } + + + /* Send shutdown RPC to each server */ + bool send_shutdown = false; + + + if (send_shutdown) { + for (i = 0; i < rank_list->rl_nr; i++) { + + rank = rank_list->rl_ranks[i]; + DBG_PRINT("Sending shutdown to rank=%d\n", rank); + + server_ep.ep_rank = rank; + server_ep.ep_tag = 0; + server_ep.ep_grp = grp; + + rc = crt_req_create(crt_ctx, &server_ep, RPC_SHUTDOWN, &rpc); + if (rc != 0) { + D_ERROR("crt_req_create() failed; rc=%d\n", rc); + assert(0); + } + + rc = crt_req_send(rpc, rpc_handle_reply, &sem); + crtu_sem_timedwait(&sem, 10, __LINE__); + DBG_PRINT("RPC response received from rank=%d\n", rank); + } + } + + D_FREE(rank_list->rl_ranks); + D_FREE(rank_list); + + rc = crt_group_view_destroy(grp); + if (rc != 0) { + D_ERROR("crt_group_view_destroy() failed; rc=%d\n", rc); + assert(0); + } + + g_do_shutdown = true; + pthread_join(progress_thread, NULL); + + sem_destroy(&sem); + + rc = crt_finalize(); + if (rc != 0) { + D_ERROR("crt_finalize() failed with rc=%d\n", rc); + assert(0); + } + + DBG_PRINT("Client successfully finished\n"); + d_log_fini(); + + return 0; +} diff --git a/src/tests/ftest/cart/dual_provider_common.h b/src/tests/ftest/cart/dual_provider_common.h new file mode 100644 index 00000000000..54b0afdb338 --- /dev/null +++ b/src/tests/ftest/cart/dual_provider_common.h @@ -0,0 +1,188 @@ +/* + * (C) Copyright 2019-2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#ifndef __DUAL_PROVIDER_COMMON_H__ +#define __DUAL_PROVIDER_COMMON_H__ +static int do_shutdown; +static int g_my_rank; + + +static void +exit_on_line(int line) +{ + printf("Failed on line %d\n", line); + exit(0); +} + +#define error_exit() exit_on_line(__LINE__) + + +#define MY_BASE 0x010000000 +#define MY_VER 0 + +#define NUM_PRIMARY_CTX_MAX 8 +#define NUM_SECONDARY_CTX_MAX 8 + +#define SERVER_GROUP_NAME "dual_provider_group" + +#define RPC_DECLARE(name) \ + CRT_RPC_DECLARE(name, CRT_ISEQ_##name, CRT_OSEQ_##name) \ + CRT_RPC_DEFINE(name, CRT_ISEQ_##name, CRT_OSEQ_##name) + +enum { + RPC_PING = CRT_PROTO_OPC(MY_BASE, MY_VER, 0), + RPC_SHUTDOWN +} rpc_id_t; + +#define CRT_ISEQ_RPC_PING /* input fields */ \ + ((crt_bulk_t) (bulk_hdl1) CRT_VAR) \ + ((crt_bulk_t) (bulk_hdl2) CRT_VAR) \ + ((uint32_t) (size1) CRT_VAR) \ + ((uint32_t) (size2) CRT_VAR) + +#define CRT_OSEQ_RPC_PING /* output fields */ \ + ((crt_bulk_t) (ret_bulk) CRT_VAR) \ + ((int32_t) (rc) CRT_VAR) + +#define CRT_ISEQ_RPC_SHUTDOWN /* input fields */ \ + ((uint32_t) (field) CRT_VAR) + +#define CRT_OSEQ_RPC_SHUTDOWN /* output fields */ \ + ((uint32_t) (field) CRT_VAR) + + +static int handler_ping(crt_rpc_t *rpc); +static int handler_shutdown(crt_rpc_t *rpc); + +RPC_DECLARE(RPC_PING); +RPC_DECLARE(RPC_SHUTDOWN); + +struct crt_proto_rpc_format my_proto_rpc_fmt[] = { + { + .prf_flags = 0, + .prf_req_fmt = &CQF_RPC_PING, + .prf_hdlr = (void *)handler_ping, + .prf_co_ops = NULL, + }, { + .prf_flags = 0, + .prf_req_fmt = &CQF_RPC_SHUTDOWN, + .prf_hdlr = (void *)handler_shutdown, + .prf_co_ops = NULL, + } +}; + +struct crt_proto_format my_proto_fmt = { + .cpf_name = "my-proto", + .cpf_ver = MY_VER, + .cpf_count = ARRAY_SIZE(my_proto_rpc_fmt), + .cpf_prf = &my_proto_rpc_fmt[0], + .cpf_base = MY_BASE, +}; + +static int +bulk_transfer_done_cb(const struct crt_bulk_cb_info *info) +{ + void *buff; + int rc; + + DBG_PRINT("Bulk transfer failed with rc=%d\n", info->bci_rc); + if (info->bci_rc != 0) { + error_exit(); + } + + DBG_PRINT("Bulk transfer done\n"); + + rc = crt_reply_send(info->bci_bulk_desc->bd_rpc); + if (rc != 0) { + D_ERROR("Failed to send response\n"); + error_exit(); + } + + crt_bulk_free(info->bci_bulk_desc->bd_local_hdl); + + buff = info->bci_arg; + D_FREE(buff); + + RPC_PUB_DECREF(info->bci_bulk_desc->bd_rpc); + + return 0; +} + +static int +handler_ping(crt_rpc_t *rpc) +{ + struct RPC_PING_in *input; + struct RPC_PING_out *output; + crt_context_t *ctx; + int rc = 0; + + input = crt_req_get(rpc); + output = crt_reply_get(rpc); + + output->rc = 0; + DBG_PRINT("Sizes: %d %d\n", input->size1, input->size2); + + ctx = rpc->cr_ctx; + + DBG_PRINT("RPC arived on a %s context\n", + crt_context_is_primary(ctx) ? "primary" : "secondary"); + + /* TODO: Change this to rank == 2 when bulk support is added */ + if (g_my_rank == 100002) { + struct crt_bulk_desc bulk_desc; + crt_bulk_t dst_bulk; + char *dst; + d_sg_list_t sgl; + + DBG_PRINT("Initiating transfer\n"); + + D_ALLOC_ARRAY(dst, input->size2); + + rc = d_sgl_init(&sgl, 1); + if (rc != 0) + error_exit(); + + sgl.sg_iovs[0].iov_buf = dst; + sgl.sg_iovs[0].iov_buf_len = input->size2; + sgl.sg_iovs[0].iov_len = input->size2; + + rc = crt_bulk_create(rpc->cr_ctx, &sgl, CRT_BULK_RW, &dst_bulk); + if (rc != 0) + error_exit(); + + RPC_PUB_ADDREF(rpc); + bulk_desc.bd_rpc = rpc; + bulk_desc.bd_bulk_op = CRT_BULK_GET; + bulk_desc.bd_remote_hdl = input->bulk_hdl2; + bulk_desc.bd_remote_off = 0; + bulk_desc.bd_local_hdl = dst_bulk; + bulk_desc.bd_local_off = 0; + bulk_desc.bd_len = input->size2; + rc = crt_bulk_bind_transfer(&bulk_desc, bulk_transfer_done_cb, + dst, NULL); + if (rc != 0) { + D_ERROR("transfer failed; rc=%d\n", rc); + error_exit(); + } + } + + rc = crt_reply_send(rpc); + if (rc) + D_ERROR("Failed with rc=%d\n", rc); + + return 0; +} + + +static int +handler_shutdown(crt_rpc_t *rpc) +{ + crt_reply_send(rpc); + + do_shutdown = 1; + return 0; +} + +#endif /* __DUAL_PROVIDER_COMMON_H__ */ diff --git a/src/tests/ftest/cart/dual_provider_server.c b/src/tests/ftest/cart/dual_provider_server.c new file mode 100644 index 00000000000..b9b93ee0c8c --- /dev/null +++ b/src/tests/ftest/cart/dual_provider_server.c @@ -0,0 +1,393 @@ +/* + * (C) Copyright 2018-2022 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * Dual-provider server + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "crt_utils.h" + +#include "dual_provider_common.h" + + + +static void +print_usage(const char *msg) +{ + printf("Error: %s\n", msg); + printf("Usage: ./dual_provider_server -i 'iface0,iface1' "); + printf("-d 'domain0,domain1' -p 'provider0,provider1' [-c 'num1,num2] "); + printf("[-f 'file_to_transfer']\n"); + printf("\nLaunches server in dual provider mode based on provided args"); + printf("NOTE: Same argument values can be specified for both "); + printf("servers, e.g. -i 'eth0,eth0'\n"); + printf("\nArguments:\n"); + printf("-i 'iface0,iface1' : Specify two network interfaces to use; "); + printf("e.g. 'eth0,eth1'\n"); + printf("-d 'domain0,domain1': Specify two domains to use; "); + printf("e.g. 'eth0,eth1'\n"); + printf("-p 'provider0,provider1\n' : Specify providers to use; "); + printf("e.g. 'ofi+tcp,ofi+verbs'\n"); + printf("-c 'num1,num2' : Specify number of contexts to allocate on each\n"); + printf("NOTE: first provider will be considered a primary one\n"); + printf("-f [filename] : If set will transfer contents "); + printf("of the specified file via bulk/rdma as part of 'PING' rpc\n"); +} + +void * +progress_fn(void *data) +{ + crt_context_t *p_ctx = (crt_context_t *)data; + int rc; + + while (do_shutdown == 0) + crt_progress(*p_ctx, 1000); + + sleep(1); + rc = crt_context_destroy(*p_ctx, 1); + if (rc != 0) + D_ERROR("ctx destroy failed\n"); + pthread_exit(NULL); +} + +static void +__split_arg(char *arg_to_split, char **first_arg, char **second_arg) +{ + char *save_ptr; + + if (!arg_to_split) + return; + + if (first_arg == NULL || second_arg == NULL) { + print_usage("Fatal error, arguments NULL\n"); + exit(-1); + } + + *first_arg = strtok_r(arg_to_split, ",", &save_ptr); + if (*first_arg == NULL) { + print_usage("Failed to parse first arg"); + exit(-1); + } + + *second_arg = save_ptr; + if (*second_arg == NULL) { + print_usage("Failed to parse second arg"); + exit(-1); + } +} + + +int main(int argc, char **argv) +{ + int num_primary_ctx; + int num_secondary_ctx; + crt_context_t primary_ctx[NUM_PRIMARY_CTX_MAX]; + pthread_t primary_progress_thread[NUM_PRIMARY_CTX_MAX]; + + crt_context_t secondary_ctx[NUM_SECONDARY_CTX_MAX]; + pthread_t secondary_progress_thread[NUM_SECONDARY_CTX_MAX]; + + int rc; + int i; + char c; + char *arg_interface = NULL; + char *arg_domain = NULL; + char *arg_provider = NULL; + char *arg_mmap_file = NULL; + char *arg_num_ctx = NULL; + char default_iface0[] = "ib0"; + char default_iface1[] = "ib1"; + char default_domain0[] = "mlx5_0"; + char default_domain1[] = "mlx5_1"; + char default_provider0[] = "ofi+verbs;ofi_rxm\0"; + char default_provider1[] = "ofi+tcp;ofi+rxm\0"; + + char *iface0, *iface1; + char *domain0, *domain1; + char *provider0, *provider1; + char *num_ctx0, *num_ctx1; + char *env_self_rank; + char *env_group_cfg; + char *my_uri; + uint32_t grp_size; + crt_group_t *grp; + char *uri; + char *saved_provider; + char *saved_iface; + char *saved_domain; + + + /* Get self rank and a group config file from envs set by crt_launch */ + env_self_rank = getenv("CRT_L_RANK"); + env_group_cfg = getenv("CRT_L_GRP_CFG"); + + if (env_self_rank == NULL || env_group_cfg == NULL) { + printf("Error: This application is intended to be launched via crt_launch\n"); + return 0; + } + + g_my_rank = atoi(env_self_rank); + crtu_test_init(g_my_rank, 20, true, true); + + iface0 = default_iface0; + iface1 = default_iface1; + domain0 = default_domain0; + domain1 = default_domain1; + provider0 = default_provider0; + provider1 = default_provider1; + + num_primary_ctx = NUM_PRIMARY_CTX_MAX; + num_secondary_ctx = NUM_SECONDARY_CTX_MAX; + + while ((c = getopt(argc, argv, "i:p:d:f:c:")) != -1) { + switch (c) { + case 'i': + arg_interface = optarg; + break; + case 'd': + arg_domain = optarg; + break; + case 'p': + arg_provider = optarg; + break; + case 'f': + arg_mmap_file = optarg; + break; + case 'c': + arg_num_ctx = optarg; + break; + default: + print_usage("invalid argument\n"); + return -1; + } + } + + saved_provider = strdup(arg_provider); + saved_domain = strdup(arg_domain); + saved_iface = strdup(arg_interface); + + __split_arg(arg_interface, &iface0, &iface1); + __split_arg(arg_domain, &domain0, &domain1); + __split_arg(arg_provider, &provider0, &provider1); + + if (arg_num_ctx) { + __split_arg(arg_num_ctx, &num_ctx0, &num_ctx1); + num_primary_ctx = atoi(num_ctx0); + num_secondary_ctx = atoi(num_ctx1); + } + + if (num_primary_ctx > NUM_PRIMARY_CTX_MAX) { + printf("Error: Exceeded max alllowed %d for primary ctx\n", + NUM_PRIMARY_CTX_MAX); + return -1; + } + + if (num_secondary_ctx > NUM_SECONDARY_CTX_MAX) { + printf("Error: Exceeded max alllowed %d for secondary ctx\n", + NUM_SECONDARY_CTX_MAX); + return -1; + } + + printf("----------------------------------------\n"); + printf("My_rank: %d\n", g_my_rank); + printf("Provider0: '%s' Interface0: '%s' Domain0: '%s' #ctx: %d\n", + provider0, iface0, domain0, num_primary_ctx); + printf("Provider1: '%s' Interface1: '%s' Domain1: '%s' #ctx: %d\n", + provider1, iface1, domain1, num_secondary_ctx); + printf("File to transfer: '%s'\n", + arg_mmap_file ? arg_mmap_file : "none"); + printf("----------------------------------------\n\n"); + + /* Done with parsing, now start the server up */ + rc = d_log_init(); + if (rc != 0) { + D_ERROR("d_log_init() failed; rc=%d\n", rc); + error_exit(); + } + + crt_init_options_t init_opts = {0}; + + init_opts.cio_provider = saved_provider; + init_opts.cio_interface = saved_iface; + init_opts.cio_domain = saved_domain; + + rc = crt_init_opt(SERVER_GROUP_NAME, + CRT_FLAG_BIT_SERVER | CRT_FLAG_BIT_AUTO_SWIM_DISABLE, + &init_opts); + if (rc != 0) { + D_ERROR("crt_init() failed; rc=%d\n", rc); + error_exit(); + } + + for (i = 0; i < num_primary_ctx; i++) { + rc = crt_context_create(&primary_ctx[i]); + if (rc != 0) { + D_ERROR("Context %d creation failed; rc=%d\n", i, rc); + error_exit(); + } + + rc = crt_context_uri_get(primary_ctx[i], &uri); + if (rc != 0) { + D_ERROR("crt_context_uri_get(%d) failed; rc=%d\n", i, rc); + error_exit(); + } + printf("Primary context[%d] uri=%s\n", i, uri); + + rc = pthread_create(&primary_progress_thread[i], 0, progress_fn, &primary_ctx[i]); + if (rc != 0) + error_exit(); + } + + for (i = 0; i < num_secondary_ctx; i++) { + rc = crt_context_create_secondary(&secondary_ctx[i], 0); + if (rc != 0) { + D_ERROR("Context %d creation failed; rc=%d\n", i, rc); + error_exit(); + } + + rc = crt_context_uri_get(secondary_ctx[i], &uri); + if (rc != 0) { + D_ERROR("crt_context_uri_get(%d) failed; rc=%d\n", i, rc); + error_exit(); + } + printf("Secondary context[%d] uri=%s\n", i, uri); + + rc = pthread_create(&secondary_progress_thread[i], 0, + progress_fn, &secondary_ctx[i]); + if (rc != 0) { + error_exit(); + } + } + + rc = crt_proto_register(&my_proto_fmt); + if (rc != 0) { + D_ERROR("crt_proto_register() failed; rc=%d\n", rc); + error_exit(); + } + + grp = crt_group_lookup(NULL); + if (!grp) + error_exit(); + + rc = crt_rank_self_set(g_my_rank); + if (rc != 0) + error_exit(); + + { + FILE *f; + int parsed_rank; + char parsed_addr[256]; + + f = fopen(env_group_cfg, "r"); + if (!f) { + D_ERROR("Failed to open %s\n", env_group_cfg); + error_exit(); + } + + while (1) { + rc = fscanf(f, "%8d %254s", &parsed_rank, parsed_addr); + if (rc == EOF) { + rc = 0; + break; + } + + if (parsed_rank == g_my_rank) + continue; + + + DBG_PRINT("Rank=%d uri='%s'\n", parsed_rank, parsed_addr); + rc = crt_group_primary_rank_add(primary_ctx[0], grp, + parsed_rank, parsed_addr); + + if (rc != 0) { + D_ERROR("Failed to add %d %s; rc=%d\n", + parsed_rank, parsed_addr, rc); + break; + } + } + } + + rc = crt_rank_uri_get(grp, g_my_rank, 0, &my_uri); + if (rc) + error_exit(); + + + rc = crt_group_size(NULL, &grp_size); + if (rc) + error_exit(); + + DBG_PRINT("self_rank=%d uri=%s file=%s group_size=%d\n", + g_my_rank, my_uri, env_group_cfg, grp_size); + + D_FREE(my_uri); + + + if (g_my_rank == 0) { + DBG_PRINT("Saving group config info\n"); + rc = crt_group_config_save(NULL, true); + if (rc) + error_exit(); + } + + { + FILE *f; + char *filename; + char *pri_uri0; + char *sec_uri0; + + D_ASPRINTF(filename, "/tmp/%s_rank_%d_uris.cart", SERVER_GROUP_NAME, g_my_rank); + if (filename == NULL) + error_exit(); + + f = fopen(filename, "w"); + if (f == NULL) + error_exit(); + + + rc = crt_context_uri_get(primary_ctx[0], &pri_uri0); + if (rc) + error_exit(); + + rc = crt_context_uri_get(secondary_ctx[0], &sec_uri0); + if (rc) + error_exit(); + + fprintf(f, "%s\n", pri_uri0); + fprintf(f, "%s\n", sec_uri0); + + fclose(f); + D_FREE(filename); + } + + for (i = 0; i < num_primary_ctx; i++) + pthread_join(primary_progress_thread[i], NULL); + + for (i = 0; i < num_secondary_ctx; i++) + pthread_join(secondary_progress_thread[i], NULL); + + + rc = crt_finalize(); + if (rc != 0) + error_exit(); + + d_log_fini(); + + free(saved_provider); + free(saved_domain); + free(saved_iface); + return 0; +} + From b9ed282a8a91e3d18a9fce21692a32983ad5225a Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 15 Jun 2022 06:47:27 +0800 Subject: [PATCH 09/28] DAOS-9928 object: RPC & bulk hanlder for secondary provider (#8974) RPC & bulk handler for secondary provider. Signed-off-by: Niu Yawei --- src/engine/srv.c | 45 ++++++++++-- src/object/srv_obj.c | 158 +++++++++++++++++++++++++++++++------------ 2 files changed, 156 insertions(+), 47 deletions(-) diff --git a/src/engine/srv.c b/src/engine/srv.c index a4c8125b909..1a531f26219 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -296,6 +296,45 @@ dss_rpc_hdlr(crt_context_t *ctx, void *hdlr_arg, return sched_req_enqueue(dx, &attr, real_rpc_hdlr, rpc); } +static int +secondary_rpc_hdlr(crt_context_t *ctx, void *hdlr_arg, void (*real_rpc_hdlr)(void *), void *arg) +{ + struct dss_xstream *primary_dx; + crt_rpc_t *rpc = (crt_rpc_t *)hdlr_arg; + uint32_t tag; + int xs_id, rc; + + D_DEBUG(DB_TRACE, "Received secondary RPC, opc: %#x\n", rpc->cr_opc); + + rc = crt_req_dst_tag_get(rpc, &tag); + if (rc) { + D_ERROR("Failed to get tag from RPC, "DF_RC"\n", DP_RC(rc)); + return rc; + } + + /* The RPC dest tag must be SYS0 or a VOS target */ + if (tag == 0) { + xs_id = 0; + } else if (tag >= DAOS_TGT0_OFFSET && tag < DAOS_IO_CTX_ID(dss_tgt_nr)) { + xs_id = DSS_MAIN_XS_ID(tag - DAOS_TGT0_OFFSET); + } else { + D_ERROR("Invalid tag:%u from secondary RPC\n", tag); + return -DER_INVAL; + } + + primary_dx = dss_get_xstream(xs_id); + if (primary_dx == NULL) { + D_ERROR("Failed to get primary xstream:%u\n", xs_id); + return -DER_INVAL; + } + + /* + * Given that the secondary RPC isn't common use case, ignore the CPU apportioning + * policy and kickoff the RPC processing on primary xstream immediately. + */ + return sched_create_thread(primary_dx, real_rpc_hdlr, rpc, ABT_THREAD_ATTR_NULL, NULL, 0); +} + static void dss_nvme_poll_ult(void *args) { @@ -462,10 +501,7 @@ dss_srv_handler(void *arg) xs_type = xs_id2type(dx->dx_xs_id); if (xs_type == DSS_XS_SEC) { - /* TODO: Create secondary cart context and register secondary RPC handler */ - D_ASSERTF(0, "Secondary cart context isn't supported\n"); -#if 0 - rc = crt_context_create_seconary(&dmi->dmi_ctxt); + rc = crt_context_create_secondary(&dmi->dmi_ctx, 0); if (rc != 0) { D_ERROR("Failed to create secondary crt ctxt: "DF_RC"\n", DP_RC(rc)); goto tls_fini; @@ -489,7 +525,6 @@ dss_srv_handler(void *arg) rc = -DER_INVAL; goto crt_destroy; } -#endif } else if (dx->dx_comm) { /* create private transport context */ rc = crt_context_create(&dmi->dmi_ctx); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index b817ff063ed..6a4e855be04 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -219,8 +219,83 @@ struct obj_bulk_args { int result; bool inited; ABT_eventual eventual; + ABT_mutex lock; }; +static inline int +obj_bulk_args_fini(struct obj_bulk_args *args) +{ + int rc, *status; + + D_ASSERT(args->inited); + rc = ABT_eventual_wait(args->eventual, (void **)&status); + if (rc) + rc = dss_abterr2der(rc); + else + rc = *status; + + ABT_eventual_free(&args->eventual); + ABT_mutex_free(&args->lock); + + return rc; +} + +static inline int +obj_bulk_args_init(struct obj_bulk_args *args) +{ + int rc, *status; + + rc = ABT_eventual_create(sizeof(*status), &args->eventual); + if (rc != ABT_SUCCESS) + return dss_abterr2der(rc); + + rc = ABT_mutex_create(&args->lock); + if (rc != ABT_SUCCESS) { + ABT_eventual_free(&args->eventual); + return dss_abterr2der(rc); + } + + args->inited = true; + args->bulks_inflight = 1; + args->result = 0; + return 0; +} + +/* Get the original cart context that client intended to send to */ +static void * +rpc2orig_ctx(crt_rpc_t *rpc, bool *is_primary) +{ + /* + * TODO: + * - Use new Cart API to query the original provider is primary or secondary + * through RPC; + * - Use new Cart API to query if remote bulk is originally from secondary or + * primary (for forwarded bulk transfer); + */ + *is_primary = true; + return rpc->cr_ctx; +} + +static void +obj_bulk_inflights(struct obj_bulk_args *args, crt_rpc_t *rpc, int val) +{ + bool is_primary; + + D_ASSERT(val == 1 || val == -1); + rpc2orig_ctx(rpc, &is_primary); + + if (!is_primary) + ABT_mutex_lock(args->lock); + + D_ASSERT(args->bulks_inflight > 0); + args->bulks_inflight += val; + if (args->bulks_inflight == 0) + ABT_eventual_set(args->eventual, &args->result, sizeof(args->result)); + + if (!is_primary) + ABT_mutex_unlock(args->lock); +} + static int obj_bulk_comp_cb(const struct crt_bulk_cb_info *cb_info) { @@ -241,11 +316,7 @@ obj_bulk_comp_cb(const struct crt_bulk_cb_info *cb_info) if (arg->result == 0) arg->result = cb_info->bci_rc; - D_ASSERT(arg->bulks_inflight > 0); - arg->bulks_inflight--; - if (arg->bulks_inflight == 0) - ABT_eventual_set(arg->eventual, &arg->result, - sizeof(arg->result)); + obj_bulk_inflights(arg, rpc, -1); crt_req_decref(rpc); return cb_info->bci_rc; @@ -309,6 +380,26 @@ obj_bulk_bypass(d_sg_list_t *sgl, crt_bulk_op_t bulk_op) } } +/* Get the proper cart context for local bulk handle creation */ +static inline void * +rpc2bulk_ctx(crt_rpc_t *rpc, bool create) +{ + void *orig_ctx; + bool is_primary; + + /* + * - When the bulk is on primary provider, return primary cart context; + * - When the bulk is on secondary provider, return secondary cart context + * if 'create' is true, otherwise, return NULL since we don't use bulk + * cache for secondary provider now; + */ + orig_ctx = rpc2orig_ctx(rpc, &is_primary); + if (is_primary || create) + return orig_ctx; + + return NULL; +} + static int bulk_transfer_sgl(daos_handle_t ioh, crt_rpc_t *rpc, crt_bulk_t remote_bulk, off_t remote_off, crt_bulk_op_t bulk_op, bool bulk_bind, @@ -412,7 +503,7 @@ bulk_transfer_sgl(daos_handle_t ioh, crt_rpc_t *rpc, crt_bulk_t remote_bulk, local_off = 0; sgl_sent.sg_nr = sgl_sent.sg_nr_out = iov_idx - start; - rc = crt_bulk_create(rpc->cr_ctx, &sgl_sent, bulk_perm, + rc = crt_bulk_create(rpc2bulk_ctx(rpc, true), &sgl_sent, bulk_perm, &local_bulk); if (rc != 0) { D_ERROR("crt_bulk_create %d error "DF_RC".\n", @@ -441,7 +532,7 @@ bulk_transfer_sgl(daos_handle_t ioh, crt_rpc_t *rpc, crt_bulk_t remote_bulk, bulk_desc.bd_remote_off = remote_off; bulk_desc.bd_local_off = local_off; - p_arg->bulks_inflight++; + obj_bulk_inflights(p_arg, rpc, 1); if (bulk_bind) rc = crt_bulk_bind_transfer(&bulk_desc, cached_bulk ? cached_bulk_cp : bulk_cp, p_arg, @@ -453,7 +544,7 @@ bulk_transfer_sgl(daos_handle_t ioh, crt_rpc_t *rpc, crt_bulk_t remote_bulk, if (rc < 0) { D_ERROR("crt_bulk_transfer %d error "DF_RC".\n", sgl_idx, DP_RC(rc)); - p_arg->bulks_inflight--; + obj_bulk_inflights(p_arg, rpc, -1); if (!cached_bulk) crt_bulk_free(local_bulk); crt_req_decref(rpc); @@ -472,7 +563,7 @@ obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, struct obj_bulk_args *p_arg, struct ds_cont_hdl *coh) { struct obj_bulk_args arg = { 0 }; - int i, rc, *status, ret; + int i, rc; bool async = true; if (remote_bulks == NULL) { @@ -485,17 +576,14 @@ obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, async = false; } - rc = ABT_eventual_create(sizeof(*status), &p_arg->eventual); - if (rc != 0) - return dss_abterr2der(rc); + rc = obj_bulk_args_init(p_arg); + if (rc) + return rc; - p_arg->inited = true; D_DEBUG(DB_IO, "bulk_op %d sgl_nr %d\n", bulk_op, sgl_nr); - p_arg->bulks_inflight++; - if (daos_handle_is_valid(ioh)) { - rc = vos_dedup_verify_init(ioh, rpc->cr_ctx, CRT_BULK_RW); + rc = vos_dedup_verify_init(ioh, rpc2bulk_ctx(rpc, false), CRT_BULK_RW); if (rc) { D_ERROR("Dedup verify prep failed. "DF_RC"\n", DP_RC(rc)); @@ -533,17 +621,14 @@ obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, break; } done: - if (--(p_arg->bulks_inflight) == 0) - ABT_eventual_set(p_arg->eventual, &rc, sizeof(rc)); + if (rc) + p_arg->result = rc; + obj_bulk_inflights(p_arg, rpc, -1); if (async) return rc; - ret = ABT_eventual_wait(p_arg->eventual, (void **)&status); - if (rc == 0) - rc = ret ? dss_abterr2der(ret) : *status; - - ABT_eventual_free(&p_arg->eventual); + rc = obj_bulk_args_fini(p_arg); if (rc == 0 && coh != NULL && unlikely(coh->sch_closed)) { D_ERROR("Cont hdl "DF_UUID" is closed/evicted unexpectedly\n", @@ -1501,7 +1586,7 @@ obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, goto out; biod = vos_ioh2desc(ioh); - rc = bio_iod_prep(biod, BIO_CHK_TYPE_IO, rma ? rpc->cr_ctx : NULL, + rc = bio_iod_prep(biod, BIO_CHK_TYPE_IO, rma ? rpc2bulk_ctx(rpc, false) : NULL, CRT_BULK_RW); if (rc) { D_ERROR(DF_UOID" bio_iod_prep failed: "DF_RC".\n", @@ -2053,7 +2138,7 @@ ds_obj_ec_rep_handler(crt_rpc_t *rpc) goto out; } biod = vos_ioh2desc(ioh); - rc = bio_iod_prep(biod, BIO_CHK_TYPE_IO, rpc->cr_ctx, CRT_BULK_RW); + rc = bio_iod_prep(biod, BIO_CHK_TYPE_IO, rpc2bulk_ctx(rpc, false), CRT_BULK_RW); if (rc) { D_ERROR(DF_UOID" bio_iod_prep failed: "DF_RC".\n", DP_UOID(oer->er_oid), DP_RC(rc)); @@ -2133,8 +2218,7 @@ ds_obj_ec_agg_handler(crt_rpc_t *rpc) goto out; } biod = vos_ioh2desc(ioh); - rc = bio_iod_prep(biod, BIO_CHK_TYPE_IO, rpc->cr_ctx, - CRT_BULK_RW); + rc = bio_iod_prep(biod, BIO_CHK_TYPE_IO, rpc2bulk_ctx(rpc, false), CRT_BULK_RW); if (rc) { D_ERROR(DF_UOID" bio_iod_prep failed: "DF_RC".\n", DP_UOID(oea->ea_oid), DP_RC(rc)); @@ -3958,8 +4042,8 @@ ds_cpd_handle_one(crt_rpc_t *rpc, struct daos_cpd_sub_head *dcsh, biods[i] = vos_ioh2desc(iohs[i]); rc = bio_iod_prep(biods[i], BIO_CHK_TYPE_IO, - dcu->dcu_flags & ORF_CPD_BULK ? - rpc->cr_ctx : NULL, CRT_BULK_RW); + dcu->dcu_flags & ORF_CPD_BULK ? rpc2bulk_ctx(rpc, false) : NULL, + CRT_BULK_RW); if (rc != 0) { D_ERROR("bio_iod_prep failed for obj "DF_UOID ", DTX "DF_DTI": "DF_RC"\n", @@ -4010,18 +4094,10 @@ ds_cpd_handle_one(crt_rpc_t *rpc, struct daos_cpd_sub_head *dcsh, /* P3: bulk data transafer. */ for (i = 0; i < dcde->dcde_write_cnt && rma_idx < rma; i++) { - int *status; - if (!bulks[i].inited) continue; - rc = ABT_eventual_wait(bulks[i].eventual, (void **)&status); - if (rc != 0) - rc = dss_abterr2der(rc); - if (rc == 0 && *status != 0) - rc = *status; - - ABT_eventual_free(&bulks[i].eventual); + rc = obj_bulk_args_fini(&bulks[i]); bio_iod_flush(biods[i]); rma_idx++; @@ -4143,13 +4219,11 @@ ds_cpd_handle_one(crt_rpc_t *rpc, struct daos_cpd_sub_head *dcsh, out: if (rc != 0) { if (bulks != NULL) { - for (i = 0; - i < dcde->dcde_write_cnt && rma_idx < rma; i++) { + for (i = 0; i < dcde->dcde_write_cnt && rma_idx < rma; i++) { if (!bulks[i].inited) continue; - ABT_eventual_wait(bulks[i].eventual, NULL); - ABT_eventual_free(&bulks[i].eventual); + obj_bulk_args_fini(&bulks[i]); rma_idx++; } } From e9e998f71bf0910ed2a96e8add9eead13b74cd7a Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Tue, 21 Jun 2022 09:40:33 -0600 Subject: [PATCH 10/28] DAOS-10876: Fix merges from master (#9411) There were some recent network-related control plane changes in master that need to be updated to support a multiprovider config. Features: control Signed-off-by: Kris Jacque --- src/control/server/server.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/control/server/server.go b/src/control/server/server.go index 8c351c77fea..41375ef2065 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -40,7 +40,17 @@ import ( func genFiAffFn(fis *hardware.FabricInterfaceSet) config.EngineAffinityFn { return func(l logging.Logger, e *engine.Config) (uint, error) { - fi, err := fis.GetInterfaceOnNetDevice(e.Fabric.Interface, e.Fabric.Provider) + iface, err := e.Fabric.GetPrimaryInterface() + if err != nil { + return 0, err + } + + prov, err := e.Fabric.GetPrimaryProvider() + if err != nil { + return 0, err + } + + fi, err := fis.GetInterfaceOnNetDevice(iface, prov) if err != nil { return 0, err } @@ -505,7 +515,11 @@ func (srv *server) start(ctx context.Context, shutdown context.CancelFunc) error func waitFabricReady(ctx context.Context, log logging.Logger, cfg *config.Server) error { ifaces := make([]string, 0, len(cfg.Engines)) for _, eng := range cfg.Engines { - ifaces = append(ifaces, eng.Fabric.Interface) + engIfaces, err := eng.Fabric.GetInterfaces() + if err != nil { + return err + } + ifaces = append(ifaces, engIfaces...) } // Skip wait if no fabric interfaces specified in config. From 9483a33661bd23f19cf70e129de7d48ca1b00bc7 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Wed, 22 Jun 2022 17:33:01 -0700 Subject: [PATCH 11/28] CART-89 multiprovider: Add new api to query original src provider (#9349) * New api to query original src provider Signed-off-by: Alexander A Oganezov --- src/cart/crt_hg_proc.c | 4 ++- src/cart/crt_init.c | 14 +++++++++- src/cart/crt_internal_types.h | 4 ++- src/cart/crt_rpc.c | 30 +++++++++++++++++++++ src/cart/crt_rpc.h | 9 ++++++- src/include/cart/api.h | 12 +++++++++ src/tests/ftest/cart/dual_provider_client.c | 5 ++++ src/tests/ftest/cart/dual_provider_common.h | 15 +++++++++-- 8 files changed, 87 insertions(+), 6 deletions(-) diff --git a/src/cart/crt_hg_proc.c b/src/cart/crt_hg_proc.c index 86c1aebdd58..5cb681ad2e2 100644 --- a/src/cart/crt_hg_proc.c +++ b/src/cart/crt_hg_proc.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2021 Intel Corporation. + * (C) Copyright 2016-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -524,6 +524,8 @@ crt_proc_in_common(crt_proc_t proc, crt_rpc_input_t *data) ); hdr->cch_dst_tag = rpc_priv->crp_pub.cr_ep.ep_tag; + hdr->cch_src_is_primary = rpc_priv->crp_src_is_primary; + if (crt_is_service()) { hdr->cch_src_rank = crt_grp_priv_get_primary_rank( diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 5dd0fe28a6c..adc87feb503 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -30,7 +30,8 @@ dump_envariables(void) "OFI_PORT", "OFI_INTERFACE", "OFI_DOMAIN", "CRT_CREDIT_EP_CTX", "CRT_CTX_SHARE_ADDR", "CRT_CTX_NUM", "D_FI_CONFIG", "FI_UNIVERSE_SIZE", "CRT_ENABLE_MEM_PIN", - "FI_OFI_RXM_USE_SRX", "D_LOG_FLUSH", "CRT_MRC_ENABLE" }; + "FI_OFI_RXM_USE_SRX", "D_LOG_FLUSH", "CRT_MRC_ENABLE", + "CRT_SECONDARY_PROVIDER"}; D_INFO("-- ENVARS: --\n"); for (i = 0; i < ARRAY_SIZE(envars); i++) { @@ -161,6 +162,7 @@ static int data_init(int server, crt_init_options_t *opt) uint32_t mem_pin_enable = 0; uint32_t mrc_enable = 0; uint64_t start_rpcid; + uint32_t is_secondary; int rc = 0; D_DEBUG(DB_ALL, "initializing crt_gdata...\n"); @@ -190,13 +192,23 @@ static int data_init(int server, crt_init_options_t *opt) D_DEBUG(DB_ALL, "Starting RPCID %#lx\n", start_rpcid); + is_secondary = 0; /* Apply CART-890 workaround for server side only */ if (server) { d_getenv_int("CRT_ENABLE_MEM_PIN", &mem_pin_enable); if (mem_pin_enable == 1) mem_pin_workaround(); + } else { + /* + * Client-side envariable to indicate that the cluster + * is running using a secondary provider + */ + d_getenv_int("CRT_SECONDARY_PROVIDER", &is_secondary); + } + crt_gdata.cg_provider_is_primary = (is_secondary) ? 0 : 1; + timeout = 0; if (opt && opt->cio_crt_timeout != 0) diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 742f578adaa..36579e863c2 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -98,7 +98,9 @@ struct crt_gdata { /** whether it is a client or server */ cg_server : 1, /** whether scalable endpoint is enabled */ - cg_use_sensors : 1; + cg_use_sensors : 1, + /** whether we are on a primary provider */ + cg_provider_is_primary : 1; ATOMIC uint64_t cg_rpcid; /* rpc id */ diff --git a/src/cart/crt_rpc.c b/src/cart/crt_rpc.c index b2d90570a27..c044f7b77dd 100644 --- a/src/cart/crt_rpc.c +++ b/src/cart/crt_rpc.c @@ -1607,6 +1607,14 @@ crt_rpc_priv_init(struct crt_rpc_priv *rpc_priv, crt_context_t crt_ctx, rpc_priv->crp_hdl_reuse = NULL; rpc_priv->crp_srv = srv_flag; rpc_priv->crp_ul_retry = 0; + + + if (srv_flag) { + rpc_priv->crp_src_is_primary = ctx->cc_primary; + } else { + rpc_priv->crp_src_is_primary = crt_gdata.cg_provider_is_primary; + } + /** * initialized to 1, so user can call crt_req_decref to destroy new req */ @@ -1811,6 +1819,28 @@ crt_req_src_rank_get(crt_rpc_t *rpc, d_rank_t *rank) return rc; } +int +crt_req_src_provider_is_primary(crt_rpc_t *req, bool *result) +{ + struct crt_rpc_priv *rpc_priv = NULL; + int rc = 0; + + if (req == NULL) { + D_ERROR("req is NULL\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + if (result == NULL) { + D_ERROR("result is NULL\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + rpc_priv = container_of(req, struct crt_rpc_priv, crp_pub); + *result = rpc_priv->crp_req_hdr.cch_src_is_primary; +out: + return rc; +} + int crt_req_dst_rank_get(crt_rpc_t *rpc, d_rank_t *rank) { diff --git a/src/cart/crt_rpc.h b/src/cart/crt_rpc.h index f2b313fb688..6b75d887b3c 100644 --- a/src/cart/crt_rpc.h +++ b/src/cart/crt_rpc.h @@ -67,6 +67,10 @@ struct crt_common_hdr { d_rank_t cch_src_rank; /* tag to which rpc request was sent to */ uint32_t cch_dst_tag; + /* flags */ + /* indicates whether rpc originator intended to send on a primary ctx */ + uint32_t cch_src_is_primary : 1; + /* used in crp_reply_hdr to propagate rpc failure back to sender */ uint32_t cch_rc; }; @@ -176,7 +180,10 @@ struct crt_rpc_priv { /* 1 if RPC fails HLC epsilon check */ crp_fail_hlc:1, /* RPC completed flag */ - crp_completed:1; + crp_completed:1, + /* RPC originated from a primary provider */ + crp_src_is_primary:1; + uint32_t crp_refcount; struct crt_opc_info *crp_opc_info; /* corpc info, only valid when (crp_coll == 1) */ diff --git a/src/include/cart/api.h b/src/include/cart/api.h index 9814c49ceb8..17f4a3f65d7 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -428,6 +428,18 @@ crt_req_get(crt_rpc_t *rpc) int crt_req_src_rank_get(crt_rpc_t *req, d_rank_t *rank); +/** + * Return whether originator runs from a primary provider or not + * + * \param[in] req Pointer to RPC request + * \param[out] result Returned result + * + * \return DER_SUCCESS on success or error + * on failure + */ +int +crt_req_src_provider_is_primary(crt_rpc_t *req, bool *result); + /** * Return destination rank * diff --git a/src/tests/ftest/cart/dual_provider_client.c b/src/tests/ftest/cart/dual_provider_client.c index c73a5375575..86749844cc3 100644 --- a/src/tests/ftest/cart/dual_provider_client.c +++ b/src/tests/ftest/cart/dual_provider_client.c @@ -96,6 +96,11 @@ int main(int argc, char **argv) } } + if (use_primary) + unsetenv("CRT_SECONDARY_PROVIDER"); + else + setenv("CRT_SECONDARY_PROVIDER", "1", 1); + rc = d_log_init(); assert(rc == 0); /* rank, num_attach_retries, is_server, assert_on_error */ diff --git a/src/tests/ftest/cart/dual_provider_common.h b/src/tests/ftest/cart/dual_provider_common.h index 54b0afdb338..b5b2c5004b9 100644 --- a/src/tests/ftest/cart/dual_provider_common.h +++ b/src/tests/ftest/cart/dual_provider_common.h @@ -117,6 +117,8 @@ handler_ping(crt_rpc_t *rpc) struct RPC_PING_out *output; crt_context_t *ctx; int rc = 0; + bool primary_origin = false; + input = crt_req_get(rpc); output = crt_reply_get(rpc); @@ -126,8 +128,17 @@ handler_ping(crt_rpc_t *rpc) ctx = rpc->cr_ctx; - DBG_PRINT("RPC arived on a %s context\n", - crt_context_is_primary(ctx) ? "primary" : "secondary"); + + rc = crt_req_src_provider_is_primary(rpc, &primary_origin); + + if (rc != 0) { + D_ERROR("crt_req_src_provider_is_primary() failed. rc=%d\n", rc); + error_exit(); + } + + DBG_PRINT("RPC arived on a %s context; origin was %s\n", + crt_context_is_primary(ctx) ? "primary" : "secondary", + primary_origin ? "primary" : "secondary"); /* TODO: Change this to rank == 2 when bulk support is added */ if (g_my_rank == 100002) { From ceb116adc45c722cf10a84488354879a29320a69 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 23 Jun 2022 16:56:22 -0700 Subject: [PATCH 12/28] CART-89 multiprovider: Fix issues, add new api (#9485) * CART-89 multiprovider: Fix issues, add new api - New API added to query secondary provider uri: crt_self_uri_get_secondary() - Fixed internal bug where provider info was stored per provider type rather than per provider instance. This would prevent/clash in situations when the same provider is used as a primary and a secondary Signed-off-by: Alexander A Oganezov --- src/cart/crt_context.c | 72 ++++++++++++--- src/cart/crt_ctl.c | 6 +- src/cart/crt_group.c | 9 +- src/cart/crt_hg.c | 97 +++++++++++++-------- src/cart/crt_hg.h | 19 ++-- src/cart/crt_init.c | 69 +++++++++------ src/cart/crt_internal_types.h | 8 +- src/cart/crt_swim.c | 2 +- src/include/cart/api.h | 16 ++++ src/tests/ftest/cart/dual_provider_server.c | 10 +++ 10 files changed, 208 insertions(+), 100 deletions(-) diff --git a/src/cart/crt_context.c b/src/cart/crt_context.c index 8a1052dac9e..7c7f9d54c98 100644 --- a/src/cart/crt_context.c +++ b/src/cart/crt_context.c @@ -223,9 +223,9 @@ crt_context_provider_create(crt_context_t *crt_ctx, crt_provider_t provider, boo D_GOTO(out, rc = -DER_INVAL); } - sep_mode = crt_provider_is_sep(provider); - cur_ctx_num = crt_provider_get_cur_ctx_num(provider); - max_ctx_num = crt_provider_get_max_ctx_num(provider); + sep_mode = crt_provider_is_sep(primary, provider); + cur_ctx_num = crt_provider_get_cur_ctx_num(primary, provider); + max_ctx_num = crt_provider_get_max_ctx_num(primary, provider); if (sep_mode && cur_ctx_num >= max_ctx_num) { @@ -248,7 +248,7 @@ crt_context_provider_create(crt_context_t *crt_ctx, crt_provider_t provider, boo ctx->cc_primary = primary; D_RWLOCK_WRLOCK(&crt_gdata.cg_rwlock); - rc = crt_hg_ctx_init(&ctx->cc_hg_ctx, provider, cur_ctx_num); + rc = crt_hg_ctx_init(&ctx->cc_hg_ctx, provider, cur_ctx_num, primary); if (rc != 0) { D_ERROR("crt_hg_ctx_init() failed, " DF_RC "\n", DP_RC(rc)); @@ -268,10 +268,10 @@ crt_context_provider_create(crt_context_t *crt_ctx, crt_provider_t provider, boo ctx->cc_idx = cur_ctx_num; - ctx_list = crt_provider_get_ctx_list(provider); + ctx_list = crt_provider_get_ctx_list(primary, provider); d_list_add_tail(&ctx->cc_link, ctx_list); - crt_provider_inc_cur_ctx_num(provider); + crt_provider_inc_cur_ctx_num(primary, provider); D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); @@ -654,7 +654,7 @@ crt_context_destroy(crt_context_t crt_ctx, int force) } D_RWLOCK_WRLOCK(&crt_gdata.cg_rwlock); - crt_provider_dec_cur_ctx_num(provider); + crt_provider_dec_cur_ctx_num(ctx->cc_primary, provider); d_list_del(&ctx->cc_link); D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); @@ -714,7 +714,7 @@ crt_rank_abort(d_rank_t rank) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); /* TODO: Do we need to handle secondary providers? */ - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); + ctx_list = crt_provider_get_ctx_list(true, crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { rc = 0; D_MUTEX_LOCK(&ctx->cc_mutex); @@ -1257,37 +1257,61 @@ crt_context_lookup_locked(int ctx_idx) { struct crt_context *ctx; d_list_t *ctx_list; + int i; - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); + ctx_list = crt_provider_get_ctx_list(true, crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { if (ctx->cc_idx == ctx_idx) return ctx; } + for (i = 0; i < crt_gdata.cg_num_secondary_provs; i++) { + ctx_list = crt_provider_get_ctx_list(false, crt_gdata.cg_secondary_provs[i]); + + d_list_for_each_entry(ctx, ctx_list, cc_link) { + if (ctx->cc_idx == ctx_idx) { + return ctx; + } + } + } return NULL; } -/* TODO: Need per-provider call */ crt_context_t crt_context_lookup(int ctx_idx) { struct crt_context *ctx; bool found = false; + int i; d_list_t *ctx_list; D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); + ctx_list = crt_provider_get_ctx_list(true, crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { if (ctx->cc_idx == ctx_idx) { found = true; - break; + D_GOTO(unlock, 0); + } + } + + for (i = 0; i < crt_gdata.cg_num_secondary_provs; i++) { + ctx_list = crt_provider_get_ctx_list(false, crt_gdata.cg_secondary_provs[i]); + + d_list_for_each_entry(ctx, ctx_list, cc_link) { + if (ctx->cc_idx == ctx_idx) { + found = true; + break; + } } } + +unlock: D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); + return (found == true) ? ctx : NULL; } @@ -1310,6 +1334,26 @@ crt_context_idx(crt_context_t crt_ctx, int *ctx_idx) return rc; } +int +crt_self_uri_get_secondary(int secondary_idx, char **uri) +{ + char *addr; + + if (secondary_idx != 0) { + D_ERROR("Only index=0 supported for now\n"); + return -DER_NONEXIST; + } + + addr = crt_gdata.cg_prov_gdata_secondary[secondary_idx].cpg_addr; + + D_STRNDUP(*uri, addr, CRT_ADDR_STR_MAX_LEN - 1); + + if (!*uri) + return -DER_NOMEM; + + return DER_SUCCESS; +} + int crt_self_uri_get(int tag, char **uri) { @@ -1344,7 +1388,7 @@ crt_context_num(int *ctx_num) return -DER_INVAL; } - *ctx_num = crt_gdata.cg_prov_gdata[crt_gdata.cg_primary_prov].cpg_ctx_num; + *ctx_num = crt_gdata.cg_prov_gdata_primary.cpg_ctx_num; return 0; } @@ -1356,7 +1400,7 @@ crt_context_empty(int provider, int locked) if (locked == 0) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - rc = d_list_empty(&crt_gdata.cg_prov_gdata[provider].cpg_ctx_list); + rc = d_list_empty(&crt_gdata.cg_prov_gdata_primary.cpg_ctx_list); if (locked == 0) D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); diff --git a/src/cart/crt_ctl.c b/src/cart/crt_ctl.c index fa6c0273558..3500bec4a91 100644 --- a/src/cart/crt_ctl.c +++ b/src/cart/crt_ctl.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2018-2021 Intel Corporation. + * (C) Copyright 2018-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -204,9 +204,9 @@ crt_hdlr_ctl_ls(crt_rpc_t *rpc_req) /* TODO: Need to derive provider from rpc struct */ provider = crt_gdata.cg_primary_prov; - ctx_list = crt_provider_get_ctx_list(provider); + ctx_list = crt_provider_get_ctx_list(true, provider); - out_args->cel_ctx_num = crt_provider_get_cur_ctx_num(provider); + out_args->cel_ctx_num = crt_provider_get_cur_ctx_num(true, provider); d_list_for_each_entry(ctx, ctx_list, cc_link) { str_size = CRT_ADDR_STR_MAX_LEN; diff --git a/src/cart/crt_group.c b/src/cart/crt_group.c index 197fa83947f..125b92e0166 100644 --- a/src/cart/crt_group.c +++ b/src/cart/crt_group.c @@ -865,7 +865,7 @@ crt_grp_lc_addr_insert(struct crt_grp_priv *passed_grp_priv, D_ASSERT(crt_ctx != NULL); - if (crt_provider_is_sep(crt_ctx->cc_hg_ctx.chc_provider)) + if (crt_provider_is_sep(true, crt_ctx->cc_hg_ctx.chc_provider)) tag = 0; grp_priv = passed_grp_priv; @@ -937,7 +937,7 @@ crt_grp_lc_lookup(struct crt_grp_priv *grp_priv, int ctx_idx, provider = crt_gdata.cg_primary_prov; /* TODO: Derive from context */ - if (crt_provider_is_sep(provider)) + if (crt_provider_is_sep(true, provider)) tag = 0; default_grp_priv = grp_priv; @@ -1904,8 +1904,7 @@ crt_group_config_save(crt_group_t *grp, bool forall) rank = grp_priv->gp_self; - /* TODO: Per provider address needs to be stored in future */ - addr = crt_gdata.cg_prov_gdata[crt_gdata.cg_primary_prov].cpg_addr; + addr = crt_gdata.cg_prov_gdata_primary.cpg_addr; grpid = grp_priv->gp_pub.cg_grpid; filename = crt_grp_attach_info_filename(grp_priv); @@ -2531,7 +2530,7 @@ crt_rank_self_set(d_rank_t rank) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); + ctx_list = crt_provider_get_ctx_list(true, crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { hg_class = ctx->cc_hg_ctx.chc_hgcla; diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index c45893dfd84..131b94d6896 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -444,23 +444,36 @@ crt_hg_reg_rpcid(hg_class_t *hg_class) * be returned during multi-provider support implementation */ static struct crt_prov_gdata * -crt_get_prov_gdata(int provider) +crt_get_prov_gdata(bool primary, int provider) { - return &crt_gdata.cg_prov_gdata[provider]; + int i; + + if (primary) + return &crt_gdata.cg_prov_gdata_primary; + + for (i = 0; i < crt_gdata.cg_num_secondary_provs; i++) { + if (provider == crt_gdata.cg_secondary_provs[i]) + return &crt_gdata.cg_prov_gdata_secondary[i]; + } + + D_ASSERTF(0, "Unable to lookup provider %d on primary=%d\n", + provider, primary); + + return NULL; } static int -crt_provider_ctx0_port_get(int provider) +crt_provider_ctx0_port_get(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return prov_data->cpg_na_config.noc_port; } static char* -crt_provider_domain_get(int provider) +crt_provider_domain_get(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return prov_data->cpg_na_config.noc_domain; } @@ -472,9 +485,9 @@ crt_provider_name_get(int provider) } static char* -crt_provider_ip_str_get(int provider) +crt_provider_ip_str_get(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return prov_data->cpg_na_config.noc_ip_str; } @@ -501,63 +514,71 @@ crt_provider_is_port_based(int provider) } bool -crt_provider_is_sep(int provider) +crt_provider_is_sep(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return prov_data->cpg_sep_mode; } void -crt_provider_set_sep(int provider, bool enable) +crt_provider_set_sep(bool primary, int provider, bool enable) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); prov_data->cpg_sep_mode = (enable) ? 1 : 0; } int -crt_provider_get_cur_ctx_num(int provider) +crt_provider_get_cur_ctx_num(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return prov_data->cpg_ctx_num; } int -crt_provider_get_max_ctx_num(int provider) +crt_provider_get_max_ctx_num(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return prov_data->cpg_ctx_max_num; } +struct crt_na_config* +crt_provider_get_na_config(bool primary, int provider) +{ + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); + + return &prov_data->cpg_na_config; +} + void -crt_provider_inc_cur_ctx_num(int provider) +crt_provider_inc_cur_ctx_num(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); prov_data->cpg_ctx_num++; } void -crt_provider_dec_cur_ctx_num(int provider) +crt_provider_dec_cur_ctx_num(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); prov_data->cpg_ctx_num--; } d_list_t -*crt_provider_get_ctx_list(int provider) +*crt_provider_get_ctx_list(bool primary, int provider) { - struct crt_prov_gdata *prov_data = crt_get_prov_gdata(provider); + struct crt_prov_gdata *prov_data = crt_get_prov_gdata(primary, provider); return &(prov_data->cpg_ctx_list); } static int -crt_get_info_string(int provider, char **string, int ctx_idx) +crt_get_info_string(bool primary, int provider, char **string, int ctx_idx) { char *provider_str; int start_port; @@ -565,9 +586,9 @@ crt_get_info_string(int provider, char **string, int ctx_idx) char *ip_str; provider_str = crt_provider_name_get(provider); - start_port = crt_provider_ctx0_port_get(provider); - domain_str = crt_provider_domain_get(provider); - ip_str = crt_provider_ip_str_get(provider); + start_port = crt_provider_ctx0_port_get(primary, provider); + domain_str = crt_provider_domain_get(primary, provider); + ip_str = crt_provider_ip_str_get(primary, provider); if (provider == CRT_PROV_SM) { D_ASPRINTF(*string, "%s://", provider_str); @@ -675,7 +696,7 @@ crt_sep_hg_class_set(int provider, hg_class_t *class) } static int -crt_hg_class_init(int provider, int idx, hg_class_t **ret_hg_class) +crt_hg_class_init(int provider, int idx, bool primary, hg_class_t **ret_hg_class) { char *info_string = NULL; struct hg_init_info init_info = HG_INIT_INFO_INITIALIZER; @@ -685,8 +706,8 @@ crt_hg_class_init(int provider, int idx, hg_class_t **ret_hg_class) struct crt_prov_gdata *prov_data; int rc = DER_SUCCESS; - prov_data = crt_get_prov_gdata(provider); - rc = crt_get_info_string(provider, &info_string, idx); + prov_data = crt_get_prov_gdata(primary, provider); + rc = crt_get_info_string(primary, provider, &info_string, idx); if (rc != 0) D_GOTO(out, rc); @@ -695,9 +716,9 @@ crt_hg_class_init(int provider, int idx, hg_class_t **ret_hg_class) else init_info.na_init_info.progress_mode = NA_NO_BLOCK; - if (crt_provider_is_sep(provider)) + if (crt_provider_is_sep(primary, provider)) init_info.na_init_info.max_contexts = - crt_provider_get_max_ctx_num(provider); + crt_provider_get_max_ctx_num(primary, provider); else init_info.na_init_info.max_contexts = 1; @@ -724,8 +745,8 @@ crt_hg_class_init(int provider, int idx, hg_class_t **ret_hg_class) D_DEBUG(DB_NET, "New context(idx:%d), listen address: %s.\n", idx, addr_str); - /* TODO: Need to store per provider addr for multi-provider support */ - if (idx == 0) + /* If address for this provider isn't filled yet*/ + if (prov_data->cpg_addr[0] == '\0') strncpy(prov_data->cpg_addr, addr_str, str_size); rc = crt_hg_reg_rpcid(hg_class); @@ -745,7 +766,7 @@ crt_hg_class_init(int provider, int idx, hg_class_t **ret_hg_class) } int -crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx) +crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx, bool primary) { struct crt_context *crt_ctx; hg_class_t *hg_class = NULL; @@ -758,13 +779,13 @@ crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx) crt_ctx = container_of(hg_ctx, struct crt_context, cc_hg_ctx); hg_ctx->chc_provider = provider; - sep_mode = crt_provider_is_sep(provider); + sep_mode = crt_provider_is_sep(true, provider); /* In SEP mode all contexts share same hg_class*/ if (sep_mode) { /* Only initialize class for context0 */ if (idx == 0) { - rc = crt_hg_class_init(provider, idx, &hg_class); + rc = crt_hg_class_init(provider, idx, primary, &hg_class); if (rc != 0) D_GOTO(out, rc); @@ -773,7 +794,7 @@ crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx) hg_class = crt_sep_hg_class_get(provider); } } else { - rc = crt_hg_class_init(provider, idx, &hg_class); + rc = crt_hg_class_init(provider, idx, primary, &hg_class); if (rc != 0) D_GOTO(out, rc); } @@ -1052,7 +1073,7 @@ crt_hg_req_create(struct crt_hg_context *hg_ctx, struct crt_rpc_priv *rpc_priv) } } - if (crt_provider_is_sep(hg_ctx->chc_provider)) { + if (crt_provider_is_sep(true, hg_ctx->chc_provider)) { hg_ret = HG_Set_target_id(rpc_priv->crp_hg_hdl, rpc_priv->crp_pub.cr_ep.ep_tag); if (hg_ret != HG_SUCCESS) { diff --git a/src/cart/crt_hg.h b/src/cart/crt_hg.h index 95655404583..a7e55fb88a8 100644 --- a/src/cart/crt_hg.h +++ b/src/cart/crt_hg.h @@ -100,7 +100,7 @@ struct crt_hg_context { /* crt_hg.c */ int crt_hg_init(void); int crt_hg_fini(void); -int crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx); +int crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx, bool primary); int crt_hg_ctx_fini(struct crt_hg_context *hg_ctx); int crt_hg_req_create(struct crt_hg_context *hg_ctx, struct crt_rpc_priv *rpc_priv); @@ -126,15 +126,16 @@ int crt_proc_out_common(crt_proc_t proc, crt_rpc_output_t *data); bool crt_provider_is_contig_ep(int provider); bool crt_provider_is_port_based(int provider); -bool crt_provider_is_sep(int provider); -void crt_provider_set_sep(int provider, bool enable); -int crt_provider_get_cur_ctx_num(int provider); -void crt_provider_inc_cur_ctx_num(int provider); -void crt_provider_dec_cur_ctx_num(int provider); char *crt_provider_name_get(int provider); - -int crt_provider_get_max_ctx_num(int provider); -d_list_t *crt_provider_get_ctx_list(int provider); +bool crt_provider_is_sep(bool primary, int provider); +void crt_provider_set_sep(bool primary, int provider, bool enable); +int crt_provider_get_cur_ctx_num(bool primary, int provider); +void crt_provider_inc_cur_ctx_num(bool primary, int provider); +void crt_provider_dec_cur_ctx_num(bool primary, int provider); +int crt_provider_get_max_ctx_num(bool primary, int provider); +d_list_t *crt_provider_get_ctx_list(bool primary, int provider); +struct crt_na_config* +crt_provider_get_na_config(bool primary, int provider); static inline int crt_hgret_2_der(int hg_ret) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index adc87feb503..0b927841051 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -53,7 +53,8 @@ dump_opt(crt_init_options_t *opt) } static int -crt_na_config_init(crt_provider_t provider, char *interface, char *domain, char *port); +crt_na_config_init(bool primary, crt_provider_t provider, + char *interface, char *domain, char *port); /* Workaround for CART-890 */ static void @@ -440,7 +441,7 @@ check_grpid(crt_group_id_t grpid) } static int -prov_settings_apply(crt_provider_t prov, crt_init_options_t *opt) +prov_settings_apply(crt_provider_t prov, bool primary, crt_init_options_t *opt) { char *srx_env; int rc = 0; @@ -448,10 +449,10 @@ prov_settings_apply(crt_provider_t prov, crt_init_options_t *opt) /* rxm and verbs providers only works with regular EP */ if ((prov == CRT_PROV_OFI_VERBS_RXM || prov == CRT_PROV_OFI_TCP_RXM) && - crt_provider_is_sep(prov)) { + crt_provider_is_sep(prov, primary)) { D_WARN("set CRT_CTX_SHARE_ADDR as 1 is invalid " "for current provider, ignoring it.\n"); - crt_provider_set_sep(prov, false); + crt_provider_set_sep(prov, primary, false); } if (prov == CRT_PROV_OFI_VERBS_RXM || @@ -611,44 +612,45 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) rc = __split_arg(provider_env, &provider_str0, &provider_str1); if (rc != 0) - D_GOTO(out, rc); + D_GOTO(cleanup, rc); primary_provider = crt_str_to_provider(provider_str0); secondary_provider = crt_str_to_provider(provider_str1); if (primary_provider == CRT_PROV_UNKNOWN) { D_ERROR("Requested provider %s not found\n", provider_env); - D_GOTO(out, rc = -DER_NONEXIST); + D_GOTO(cleanup, rc = -DER_NONEXIST); } rc = __split_arg(interface_env, &iface0, &iface1); if (rc != 0) - D_GOTO(out, rc); + D_GOTO(cleanup, rc); rc = __split_arg(domain_env, &domain0, &domain1); if (rc != 0) - D_GOTO(out, rc); + D_GOTO(cleanup, rc); rc = __split_arg(port_str, &port0, &port1); if (rc != 0) - D_GOTO(out, rc); + D_GOTO(cleanup, rc); if (iface0 == NULL) { D_ERROR("Empty interface specified\n"); - D_GOTO(out, rc = -DER_INVAL); + D_GOTO(cleanup, rc = -DER_INVAL); } - prov_data_init(&crt_gdata.cg_prov_gdata[primary_provider], + prov_data_init(&crt_gdata.cg_prov_gdata_primary, primary_provider, true, opt); - prov_settings_apply(primary_provider, opt); + prov_settings_apply(true, primary_provider, opt); crt_gdata.cg_primary_prov = primary_provider; - rc = crt_na_config_init(primary_provider, iface0, domain0, port0); + rc = crt_na_config_init(true, primary_provider, iface0, domain0, port0); if (rc != 0) { D_ERROR("crt_na_config_init() failed, "DF_RC"\n", DP_RC(rc)); - D_GOTO(out, rc); + D_GOTO(cleanup, rc); } if (secondary_provider != CRT_PROV_UNKNOWN) { num_secondaries = 1; + crt_gdata.cg_num_secondary_provs = num_secondaries; if (port1 == NULL || port1[0] == '\0') { port1 = port0; @@ -656,7 +658,11 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_ALLOC_ARRAY(crt_gdata.cg_secondary_provs, num_secondaries); if (crt_gdata.cg_secondary_provs == NULL) - D_GOTO(out, rc = -DER_NOMEM); + D_GOTO(cleanup, rc = -DER_NOMEM); + + D_ALLOC_ARRAY(crt_gdata.cg_prov_gdata_secondary, num_secondaries); + if (crt_gdata.cg_prov_gdata_secondary == NULL) + D_GOTO(cleanup, rc = -DER_NOMEM); crt_gdata.cg_secondary_provs[0] = secondary_provider; } @@ -664,17 +670,16 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) for (i = 0; i < num_secondaries; i++) { tmp_prov = crt_gdata.cg_secondary_provs[i]; - prov_data_init(&crt_gdata.cg_prov_gdata[tmp_prov], + prov_data_init(&crt_gdata.cg_prov_gdata_secondary[i], tmp_prov, false, opt); - prov_settings_apply(tmp_prov, opt); + prov_settings_apply(false, tmp_prov, opt); - rc = crt_na_config_init(tmp_prov, iface1, domain1, port1); + rc = crt_na_config_init(false, tmp_prov, iface1, domain1, port1); if (rc != 0) { D_ERROR("crt_na_config_init() failed, "DF_RC"\n", DP_RC(rc)); - D_GOTO(out, rc); + D_GOTO(cleanup, rc); } } - crt_gdata.cg_num_secondary_provs = num_secondaries; rc = crt_hg_init(); if (rc != 0) { @@ -736,7 +741,10 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) if (crt_gdata.cg_opc_map != NULL) crt_opc_map_destroy(crt_gdata.cg_opc_map); - crt_na_config_fini(crt_gdata.cg_primary_prov); + crt_na_config_fini(true, crt_gdata.cg_primary_prov); + + D_FREE(crt_gdata.cg_secondary_provs); + D_FREE(crt_gdata.cg_prov_gdata_secondary); unlock: D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); @@ -750,6 +758,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_FREE(iface0); D_FREE(domain0); D_FREE(provider_str0); + if (rc != 0) { D_ERROR("failed, "DF_RC"\n", DP_RC(rc)); d_fault_inject_fini(); @@ -785,7 +794,7 @@ crt_finalize(void) crt_self_test_fini(); /* TODO: Needs to happen for every initialized provider */ - prov_data = &crt_gdata.cg_prov_gdata[crt_gdata.cg_primary_prov]; + prov_data = &crt_gdata.cg_prov_gdata_primary; if (prov_data->cpg_ctx_num > 0) { D_ASSERT(!crt_context_empty(crt_gdata.cg_primary_prov, @@ -830,12 +839,15 @@ crt_finalize(void) crt_gdata.cg_inited = 0; gdata_init_flag = 0; - crt_na_config_fini(crt_gdata.cg_primary_prov); + crt_na_config_fini(true, crt_gdata.cg_primary_prov); if (crt_gdata.cg_secondary_provs != NULL) { for (i = 0; i < crt_gdata.cg_num_secondary_provs; i++) - crt_na_config_fini(crt_gdata.cg_secondary_provs[i]); + crt_na_config_fini(false, crt_gdata.cg_secondary_provs[i]); } + + D_FREE(crt_gdata.cg_secondary_provs); + D_FREE(crt_gdata.cg_prov_gdata_secondary); } else { D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); } @@ -1004,7 +1016,8 @@ crt_na_fill_ip_addr(struct crt_na_config *na_cfg) } static int -crt_na_config_init(crt_provider_t provider, char *interface, char *domain, char *port_str) +crt_na_config_init(bool primary, crt_provider_t provider, + char *interface, char *domain, char *port_str) { struct crt_na_config *na_cfg; int rc = 0; @@ -1013,7 +1026,7 @@ crt_na_config_init(crt_provider_t provider, char *interface, char *domain, char if (provider == CRT_PROV_SM) return 0; - na_cfg = &crt_gdata.cg_prov_gdata[provider].cpg_na_config; + na_cfg = crt_provider_get_na_config(primary, provider); D_STRNDUP(na_cfg->noc_interface, interface, 64); if (!na_cfg->noc_interface) D_GOTO(out, rc = -DER_NOMEM); @@ -1057,11 +1070,11 @@ crt_na_config_init(crt_provider_t provider, char *interface, char *domain, char return rc; } -void crt_na_config_fini(crt_provider_t provider) +void crt_na_config_fini(bool primary, crt_provider_t provider) { struct crt_na_config *na_cfg; - na_cfg = &crt_gdata.cg_prov_gdata[provider].cpg_na_config; + na_cfg = crt_provider_get_na_config(primary, provider); D_FREE(na_cfg->noc_interface); D_FREE(na_cfg->noc_domain); na_cfg->noc_port = 0; diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 36579e863c2..19f5897b1e9 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -60,6 +60,7 @@ struct crt_prov_gdata { cpg_inited : 1; }; +#define MAX_NUM_SECONDARY_PROVS 2 /* CaRT global data */ struct crt_gdata { @@ -69,7 +70,10 @@ struct crt_gdata { int *cg_secondary_provs; /** Provider specific data */ - struct crt_prov_gdata cg_prov_gdata[CRT_PROV_COUNT]; + struct crt_prov_gdata cg_prov_gdata_primary; + + /** */ + struct crt_prov_gdata *cg_prov_gdata_secondary; /** global timeout value (second) for all RPCs */ uint32_t cg_timeout; @@ -289,6 +293,6 @@ struct crt_opc_map { }; -void crt_na_config_fini(int provider); +void crt_na_config_fini(bool primary, int provider); #endif /* __CRT_INTERNAL_TYPES_H__ */ diff --git a/src/cart/crt_swim.c b/src/cart/crt_swim.c index be4474420b3..ffaa84616d0 100644 --- a/src/cart/crt_swim.c +++ b/src/cart/crt_swim.c @@ -742,7 +742,7 @@ static void crt_swim_update_last_unpack_hlc(struct crt_swim_membs *csm) D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock); - ctx_list = crt_provider_get_ctx_list(crt_gdata.cg_primary_prov); + ctx_list = crt_provider_get_ctx_list(true, crt_gdata.cg_primary_prov); d_list_for_each_entry(ctx, ctx_list, cc_link) { uint64_t hlc = ctx->cc_last_unpack_hlc; diff --git a/src/include/cart/api.h b/src/include/cart/api.h index 17f4a3f65d7..0bd36396b60 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -2080,6 +2080,22 @@ crt_group_rank_remove(crt_group_t *group, d_rank_t rank); */ int crt_self_uri_get(int tag, char **uri); + +/** + * Retrieve a secondary uri of self for the specified tag. + * The uri must be freed by the user using D_FREE(). + * + * \param[in] idx Secondary provider index + * \param[out] uri Returned uri string This is a NULL terminated + * string of size up to CRT_ADDR_STR_MAX_LEN + * (including the trailing NULL). Must be freed by + * the user. + * + * \return DER_SUCCESS on success, negative value + * on failure. + */ +int crt_self_uri_get_secondary(int idx, char **uri); + /** * Retrieve incarnation of self. * diff --git a/src/tests/ftest/cart/dual_provider_server.c b/src/tests/ftest/cart/dual_provider_server.c index b9b93ee0c8c..aff16da7f08 100644 --- a/src/tests/ftest/cart/dual_provider_server.c +++ b/src/tests/ftest/cart/dual_provider_server.c @@ -263,7 +263,9 @@ int main(int argc, char **argv) D_ERROR("crt_context_uri_get(%d) failed; rc=%d\n", i, rc); error_exit(); } + printf("Secondary context[%d] uri=%s\n", i, uri); + D_FREE(uri); rc = pthread_create(&secondary_progress_thread[i], 0, progress_fn, &secondary_ctx[i]); @@ -272,6 +274,14 @@ int main(int argc, char **argv) } } + rc = crt_self_uri_get_secondary(0, &uri); + if (rc != 0) + error_exit(); + + printf("Secondary uri for context0 = %s\n", uri); + + D_FREE(uri); + rc = crt_proto_register(&my_proto_fmt); if (rc != 0) { D_ERROR("crt_proto_register() failed; rc=%d\n", rc); From b068f36f14339416d075e67903b667a988b280fa Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 27 Jun 2022 23:45:38 +0800 Subject: [PATCH 13/28] DAOS-9928 engine: incorporate new Cart API (#9484) Incorporate the new Cart API crt_req_src_provider_is_primary(), so that secondary bulk transfer could be supported now. Forwarded secondary RPC is still not supported yet, it requires additional new Cart API. Removed improper assumption on the context ID for secondary context, since engine may have multiple secondary contexts. Signed-off-by: Niu Yawei --- src/engine/srv.c | 6 ------ src/object/srv_obj.c | 18 +++++++++++++----- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/engine/srv.c b/src/engine/srv.c index 1a531f26219..7c00c0abced 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -519,12 +519,6 @@ dss_srv_handler(void *arg) goto crt_destroy; } dx->dx_ctx_id = dmi->dmi_ctx_id; - - if (dx->dx_ctx_id != 0) { - D_ERROR("Invalid secondary context ID: %d\n", dx->dx_ctx_id); - rc = -DER_INVAL; - goto crt_destroy; - } } else if (dx->dx_comm) { /* create private transport context */ rc = crt_context_create(&dmi->dmi_ctx); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 6a4e855be04..9af471f7f6d 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -265,14 +265,22 @@ obj_bulk_args_init(struct obj_bulk_args *args) static void * rpc2orig_ctx(crt_rpc_t *rpc, bool *is_primary) { + int rc; + /* * TODO: - * - Use new Cart API to query the original provider is primary or secondary - * through RPC; - * - Use new Cart API to query if remote bulk is originally from secondary or - * primary (for forwarded bulk transfer); + * For forwarded RPC (server always uses primary context to create forward RPC), + * we need some new cart API to query if the RPC is originated from a secondary + * client, considering multiple secondary contexts need be supported, the new + * API may need to return context ID as well. Then IO engine will be able to get + * the proper secondary context by 'is_primary' and 'context id'. */ - *is_primary = true; + rc = crt_req_src_provider_is_primary(rpc, is_primary); + if (rc) { + D_ERROR("Failed to query provider info. "DF_RC"\n", DP_RC(rc)); + *is_primary = true; + } + return rpc->cr_ctx; } From 3fe5b697921ee200085a3e48a36839c97436ba83 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Wed, 29 Jun 2022 07:43:20 -0700 Subject: [PATCH 14/28] CART-89 multi_prov: new api, fixes (#9514) - New API crt_get_nr_secondary_providers() added. Returns number of secondary providers initialized by CART. - crt_self_uri_get_secondary() fixed to return -DER_NONEXIST when no secondary providers are initialized Signed-off-by: Alexander A Oganezov --- src/cart/crt_context.c | 11 +++++++++++ src/include/cart/api.h | 8 ++++++++ src/tests/ftest/network/cart_self_test.py | 3 ++- src/tests/ftest/network/cart_self_test.yaml | 6 ------ 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/cart/crt_context.c b/src/cart/crt_context.c index 9f9c750b0bd..f27dd91b78a 100644 --- a/src/cart/crt_context.c +++ b/src/cart/crt_context.c @@ -1329,6 +1329,12 @@ crt_context_idx(crt_context_t crt_ctx, int *ctx_idx) return rc; } +int +crt_get_nr_secondary_providers(void) +{ + return crt_gdata.cg_num_secondary_provs; +} + int crt_self_uri_get_secondary(int secondary_idx, char **uri) { @@ -1339,6 +1345,11 @@ crt_self_uri_get_secondary(int secondary_idx, char **uri) return -DER_NONEXIST; } + if ((crt_gdata.cg_prov_gdata_secondary == NULL) || + (secondary_idx >= crt_gdata.cg_num_secondary_provs)) { + return -DER_NONEXIST; + } + addr = crt_gdata.cg_prov_gdata_secondary[secondary_idx].cpg_addr; D_STRNDUP(*uri, addr, CRT_ADDR_STR_MAX_LEN - 1); diff --git a/src/include/cart/api.h b/src/include/cart/api.h index 0bd36396b60..b78bf28321e 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -2096,6 +2096,14 @@ int crt_self_uri_get(int tag, char **uri); */ int crt_self_uri_get_secondary(int idx, char **uri); +/** + * Returns number of secondary providers initialized. + * + * \return Number of secondary providers. + */ +int +crt_get_nr_secondary_providers(void); + /** * Retrieve incarnation of self. * diff --git a/src/tests/ftest/network/cart_self_test.py b/src/tests/ftest/network/cart_self_test.py index b9df79f0195..482525fe525 100755 --- a/src/tests/ftest/network/cart_self_test.py +++ b/src/tests/ftest/network/cart_self_test.py @@ -55,7 +55,8 @@ def __init__(self, *args, **kwargs): def setUp(self): """Set up each test case.""" super().setUp() - share_addr = self.params.get("share_addr", "/run/test_params/*") + # Most providers (tcp, verbs) don't support SEP mode + share_addr = 0 # Configure the daos server self.add_server_manager() diff --git a/src/tests/ftest/network/cart_self_test.yaml b/src/tests/ftest/network/cart_self_test.yaml index a5a7eda3b1d..fccc9e240d1 100644 --- a/src/tests/ftest/network/cart_self_test.yaml +++ b/src/tests/ftest/network/cart_self_test.yaml @@ -25,9 +25,3 @@ self_test: message_sizes: "\"0 b1048576\"" large_io_bulk_get: message_sizes: "\"b1048576 0\"" -test_params: - share_addr_mux: !mux - on: - share_addr: 1 - off: - share_addr: 0 From 10833df9e6ca0071c6dae904ab46b13da1802983 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Thu, 7 Jul 2022 08:48:05 -0600 Subject: [PATCH 15/28] DAOS-10897 control: Add plumbing for secondary URIs (#9483) * DAOS-10897 control: Add plumbing for secondary URIs Secondary URIs need to be passed from the engine to its control plane server in NotifyReady, and from there to the MS in the Join request. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/attachinfo.go | 42 ++++++++-- src/control/cmd/daos_agent/mgmt_rpc.go | 6 ++ src/control/common/proto/srv/srv.pb.go | 98 +++++++++++++----------- src/control/lib/control/system.go | 17 ++-- src/control/server/instance.go | 21 ++--- src/control/server/mgmt_system.go | 13 +++- src/engine/drpc_client.c | 32 +++++++- src/engine/srv.pb-c.c | 23 ++++-- src/engine/srv.pb-c.h | 13 +++- src/proto/srv/srv.proto | 7 +- 10 files changed, 190 insertions(+), 82 deletions(-) diff --git a/src/control/cmd/daos_agent/attachinfo.go b/src/control/cmd/daos_agent/attachinfo.go index d3344b3e084..0bf1362490f 100644 --- a/src/control/cmd/daos_agent/attachinfo.go +++ b/src/control/cmd/daos_agent/attachinfo.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -20,8 +20,9 @@ import ( type dumpAttachInfoCmd struct { configCmd ctlInvokerCmd - Output string `short:"o" long:"output" default:"stdout" description:"Dump output to this location"` - JSON bool `short:"j" long:"json" description:"Enable JSON output"` + Output string `short:"o" long:"output" default:"stdout" description:"Dump output to this location"` + JSON bool `short:"j" long:"json" description:"Enable JSON output"` + ProviderIdx uint `short:"n" long:"provider_idx" description:"Index of provider to fetch (if multiple)"` } func (cmd *dumpAttachInfoCmd) Execute(_ []string) error { @@ -55,6 +56,11 @@ func (cmd *dumpAttachInfoCmd) Execute(_ []string) error { return err } + ranks, err := getServiceRanksForProviderIdx(resp, int(cmd.ProviderIdx)) + if err != nil { + return err + } + /** * cart/crt_group.c:crt_group_config_save() * @@ -78,11 +84,37 @@ func (cmd *dumpAttachInfoCmd) Execute(_ []string) error { */ ew := txtfmt.NewErrWriter(out) fmt.Fprintf(ew, "name %s\n", cmd.cfg.SystemName) - fmt.Fprintf(ew, "size %d\n", len(resp.ServiceRanks)) + fmt.Fprintf(ew, "size %d\n", len(ranks)) fmt.Fprintln(ew, "all") - for _, psr := range resp.ServiceRanks { + for _, psr := range ranks { fmt.Fprintf(ew, "%d %s\n", psr.Rank, psr.Uri) } return ew.Err } + +func getServiceRanksForProviderIdx(inResp *control.GetAttachInfoResp, idx int) ([]*control.PrimaryServiceRank, error) { + if idx == 0 { + // Primary provider + return inResp.ServiceRanks, nil + } + + secIdx := idx - 1 + if secIdx < 0 || secIdx >= len(inResp.AlternateClientNetHints) { + return nil, errors.Errorf("provider index must be in range 0 <= idx <= %d", len(inResp.AlternateClientNetHints)) + } + + hint := inResp.AlternateClientNetHints[secIdx] + ranks := make([]*control.PrimaryServiceRank, 0) + for _, r := range inResp.AlternateServiceRanks { + if r.Provider == hint.Provider { + ranks = append(ranks, r) + } + } + + if len(ranks) == 0 { + return nil, errors.Errorf("no ranks for provider %q (idx %d)", hint.Provider, idx) + } + + return ranks, nil +} diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index d5ba119b86b..034934f3ff4 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -169,6 +169,8 @@ func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, req *mgm return nil, err } + mod.log.Debugf("full GetAttachInfoResp: %+v", rawResp) + reqProviders := mod.getInterfaceProviders(req.Interface, req.Domain) resp, err := mod.selectAttachInfo(rawResp, reqProviders) @@ -246,6 +248,8 @@ func (mod *mgmtModule) selectAttachInfo(srvResp *mgmtpb.GetAttachInfoResp, reqPr mod.log.Error("communications on this interface may fail") } providers = common.NewStringSet(mod.provider) + + mod.log.Debugf("using configured provider: %s", mod.provider) } if len(providers) == 0 { @@ -258,6 +262,7 @@ func (mod *mgmtModule) selectAttachInfo(srvResp *mgmtpb.GetAttachInfoResp, reqPr for _, hint := range srvResp.SecondaryClientNetHints { if providers.Has(hint.Provider) { + mod.log.Debugf("getting secondary provider %s URIs", hint.Provider) uris, err := mod.getProviderURIs(srvResp, hint.Provider) if err == nil { return &mgmtpb.GetAttachInfoResp{ @@ -267,6 +272,7 @@ func (mod *mgmtModule) selectAttachInfo(srvResp *mgmtpb.GetAttachInfoResp, reqPr ClientNetHint: hint, }, nil } + mod.log.Error(err.Error()) } } diff --git a/src/control/common/proto/srv/srv.pb.go b/src/control/common/proto/srv/srv.pb.go index fae0118c009..0ec4d271ba1 100644 --- a/src/control/common/proto/srv/srv.pb.go +++ b/src/control/common/proto/srv/srv.pb.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2021 Intel Corporation. +// (C) Copyright 2019-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,8 +8,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.26.0 -// protoc v3.12.4 +// protoc-gen-go v1.28.0 +// protoc v3.5.0 // source: srv/srv.proto package srv @@ -33,12 +33,13 @@ type NotifyReadyReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Uri string `protobuf:"bytes,1,opt,name=uri,proto3" json:"uri,omitempty"` // CaRT URI - Nctxs uint32 `protobuf:"varint,2,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Number of CaRT contexts - DrpcListenerSock string `protobuf:"bytes,3,opt,name=drpcListenerSock,proto3" json:"drpcListenerSock,omitempty"` // Path to I/O Engine's dRPC listener socket - InstanceIdx uint32 `protobuf:"varint,4,opt,name=instanceIdx,proto3" json:"instanceIdx,omitempty"` // I/O Engine instance index - Ntgts uint32 `protobuf:"varint,5,opt,name=ntgts,proto3" json:"ntgts,omitempty"` // number of VOS targets allocated in I/O Engine - Incarnation uint64 `protobuf:"varint,6,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // HLC incarnation number + Uri string `protobuf:"bytes,1,opt,name=uri,proto3" json:"uri,omitempty"` // Primary CaRT URI + Nctxs uint32 `protobuf:"varint,2,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Number of primary CaRT contexts + DrpcListenerSock string `protobuf:"bytes,3,opt,name=drpcListenerSock,proto3" json:"drpcListenerSock,omitempty"` // Path to I/O Engine's dRPC listener socket + InstanceIdx uint32 `protobuf:"varint,4,opt,name=instanceIdx,proto3" json:"instanceIdx,omitempty"` // I/O Engine instance index + Ntgts uint32 `protobuf:"varint,5,opt,name=ntgts,proto3" json:"ntgts,omitempty"` // number of VOS targets allocated in I/O Engine + Incarnation uint64 `protobuf:"varint,6,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // HLC incarnation number + SecondaryUris []string `protobuf:"bytes,7,rep,name=secondaryUris,proto3" json:"secondaryUris,omitempty"` // secondary CaRT URIs } func (x *NotifyReadyReq) Reset() { @@ -115,6 +116,13 @@ func (x *NotifyReadyReq) GetIncarnation() uint64 { return 0 } +func (x *NotifyReadyReq) GetSecondaryUris() []string { + if x != nil { + return x.SecondaryUris + } + return nil +} + type BioErrorReq struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -426,7 +434,7 @@ var File_srv_srv_proto protoreflect.FileDescriptor var file_srv_srv_proto_rawDesc = []byte{ 0x0a, 0x0d, 0x73, 0x72, 0x76, 0x2f, 0x73, 0x72, 0x76, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x03, 0x73, 0x72, 0x76, 0x22, 0xbe, 0x01, 0x0a, 0x0e, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, + 0x03, 0x73, 0x72, 0x76, 0x22, 0xe4, 0x01, 0x0a, 0x0e, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, 0x65, 0x61, 0x64, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x12, @@ -438,40 +446,42 @@ var file_srv_srv_proto_rawDesc = []byte{ 0x05, 0x6e, 0x74, 0x67, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x74, 0x67, 0x74, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xd5, 0x01, 0x0a, 0x0b, 0x42, 0x69, 0x6f, 0x45, 0x72, 0x72, - 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, - 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x75, 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, - 0x72, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, 0x72, 0x72, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, 0x72, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x77, - 0x72, 0x69, 0x74, 0x65, 0x45, 0x72, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x77, - 0x72, 0x69, 0x74, 0x65, 0x45, 0x72, 0x72, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x67, 0x74, 0x49, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x74, 0x67, 0x74, 0x49, 0x64, 0x12, 0x20, 0x0a, - 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x12, - 0x2a, 0x0a, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, - 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, - 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, - 0x72, 0x69, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x23, 0x0a, - 0x0d, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, 0x71, 0x12, 0x12, - 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x22, 0x42, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, - 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, - 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, - 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, - 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x14, 0x0a, 0x05, - 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, - 0x65, 0x6c, 0x22, 0x5b, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, - 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x42, - 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, - 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, - 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, - 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x72, 0x76, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, + 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x65, + 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x22, 0xd5, 0x01, 0x0a, 0x0b, + 0x42, 0x69, 0x6f, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x1a, 0x0a, 0x08, 0x75, + 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x75, + 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, + 0x72, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, 0x72, + 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x77, 0x72, 0x69, 0x74, 0x65, 0x45, 0x72, 0x72, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x08, 0x77, 0x72, 0x69, 0x74, 0x65, 0x45, 0x72, 0x72, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x67, 0x74, 0x49, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x74, 0x67, + 0x74, 0x49, 0x64, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, + 0x64, 0x78, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, + 0x63, 0x65, 0x49, 0x64, 0x78, 0x12, 0x2a, 0x0a, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, + 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, + 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x75, 0x72, 0x69, 0x22, 0x23, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, + 0x63, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0x42, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x50, + 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x12, + 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, + 0x65, 0x71, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x22, 0x5b, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, + 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, + 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x73, + 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, + 0x63, 0x72, 0x65, 0x70, 0x73, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, + 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x72, 0x76, + 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/control/system.go b/src/control/lib/control/system.go index 708686c3651..b1ff5eff6c6 100644 --- a/src/control/lib/control/system.go +++ b/src/control/lib/control/system.go @@ -99,14 +99,15 @@ type SystemJoinReq struct { unaryRequest msRequest retryableRequest - ControlAddr *net.TCPAddr - UUID string - Rank system.Rank - URI string - NumContexts uint32 `json:"Nctxs"` - FaultDomain *system.FaultDomain `json:"SrvFaultDomain"` - InstanceIdx uint32 `json:"Idx"` - Incarnation uint64 `json:"Incarnation"` + ControlAddr *net.TCPAddr + UUID string + Rank system.Rank + URI string + SecondaryURIs []string `json:"secondary_uris"` + NumContexts uint32 `json:"Nctxs"` + FaultDomain *system.FaultDomain `json:"SrvFaultDomain"` + InstanceIdx uint32 `json:"Idx"` + Incarnation uint64 `json:"Incarnation"` } // MarshalJSON packs SystemJoinResp struct into a JSON message. diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 904a5264131..32ec0eb5de5 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -188,15 +188,18 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify r = *superblock.Rank } - resp, err := ei.joinSystem(ctx, &control.SystemJoinReq{ - UUID: superblock.UUID, - Rank: r, - URI: ready.GetUri(), - NumContexts: ready.GetNctxs(), - FaultDomain: ei.hostFaultDomain, - InstanceIdx: ei.Index(), - Incarnation: ready.GetIncarnation(), - }) + joinReq := &control.SystemJoinReq{ + UUID: superblock.UUID, + Rank: r, + URI: ready.GetUri(), + SecondaryURIs: ready.GetSecondaryUris(), + NumContexts: ready.GetNctxs(), + FaultDomain: ei.hostFaultDomain, + InstanceIdx: ei.Index(), + Incarnation: ready.GetIncarnation(), + } + + resp, err := ei.joinSystem(ctx, joinReq) if err != nil { ei.log.Errorf("join failed: %s", err) return system.NilRank, false, err diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 44181f22762..e71f0c44836 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -71,6 +71,15 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo } } + getPrimaryProvider := func() string { + return svc.clientNetworkHint[0].Provider + } + + getSecondaryProvider := func(idx int) string { + // Primary is at idx 0, secondary providers start afterward + return svc.clientNetworkHint[idx+1].Provider + } + for rank, uris := range rankURIs { if len(svc.clientNetworkHint) < len(uris.Secondary)+1 { return nil, errors.Errorf("not enough client network hints (%d) for rank %d URIs (%d)", @@ -80,14 +89,14 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo resp.RankUris = append(resp.RankUris, &mgmtpb.GetAttachInfoResp_RankUri{ Rank: rank.Uint32(), Uri: uris.Primary, - Provider: svc.clientNetworkHint[0].Provider, + Provider: getPrimaryProvider(), }) for i, uri := range uris.Secondary { rankURI := &mgmtpb.GetAttachInfoResp_RankUri{ Rank: rank.Uint32(), Uri: uri, - Provider: svc.clientNetworkHint[i].Provider, + Provider: getSecondaryProvider(i), } resp.SecondaryRankUris = append(resp.SecondaryRankUris, rankURI) diff --git a/src/engine/drpc_client.c b/src/engine/drpc_client.c index 9be829e0f11..85efbf8a98d 100644 --- a/src/engine/drpc_client.c +++ b/src/engine/drpc_client.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2019-2022 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -162,7 +162,10 @@ drpc_notify_ready(void) size_t reqb_size; Drpc__Response *dresp; uint64_t incarnation; + size_t nr_sec_uris; + char **sec_uris = NULL; int rc; + int i; rc = crt_self_uri_get(0 /* tag */, &req.uri); if (rc != 0) @@ -171,6 +174,27 @@ drpc_notify_ready(void) if (rc != 0) goto out_uri; + nr_sec_uris = crt_get_nr_secondary_providers(); + if (nr_sec_uris > 0) { + D_ALLOC_ARRAY(sec_uris, nr_sec_uris); + if (sec_uris == NULL) + D_GOTO(out_uri, rc = -DER_NOMEM); + for (i = 0; i < nr_sec_uris; i++) { + rc = crt_self_uri_get_secondary(i, &sec_uris[i]); + if (rc != 0) { + D_ERROR("failed to get secondary provider URI, idx=%d, rc=%d", + i, rc); + nr_sec_uris = i; + goto out_sec_uri; + } + D_DEBUG(DB_MGMT, "secondary provider URI: %s\n", sec_uris[i]); + } + + D_DEBUG(DB_MGMT, "setting secondary provider URIs"); + req.secondaryuris = sec_uris; + req.n_secondaryuris = nr_sec_uris; + } + req.incarnation = incarnation; req.nctxs = DSS_CTX_NR_TOTAL; /* Do not free, this string is managed by the dRPC listener */ @@ -181,7 +205,7 @@ drpc_notify_ready(void) reqb_size = srv__notify_ready_req__get_packed_size(&req); D_ALLOC(reqb, reqb_size); if (reqb == NULL) - D_GOTO(out_uri, rc = -DER_NOMEM); + D_GOTO(out_sec_uri, rc = -DER_NOMEM); srv__notify_ready_req__pack(&req, reqb); rc = dss_drpc_call(DRPC_MODULE_SRV, DRPC_METHOD_SRV_NOTIFY_READY, reqb, @@ -197,6 +221,10 @@ drpc_notify_ready(void) drpc_response_free(dresp); out_reqb: D_FREE(reqb); +out_sec_uri: + for (i = 0; i < nr_sec_uris; i++) + D_FREE(sec_uris[i]); + D_FREE(sec_uris); out_uri: D_FREE(req.uri); out: diff --git a/src/engine/srv.pb-c.c b/src/engine/srv.pb-c.c index c182463a5e5..ffa2348c544 100644 --- a/src/engine/srv.pb-c.c +++ b/src/engine/srv.pb-c.c @@ -277,7 +277,7 @@ void srv__pool_find_by_label_resp__free_unpacked assert(message->base.descriptor == &srv__pool_find_by_label_resp__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[6] = +static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[7] = { { "uri", @@ -351,6 +351,18 @@ static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[6 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "secondaryUris", + 7, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Srv__NotifyReadyReq, n_secondaryuris), + offsetof(Srv__NotifyReadyReq, secondaryuris), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned srv__notify_ready_req__field_indices_by_name[] = { 2, /* field[2] = drpcListenerSock */ @@ -358,12 +370,13 @@ static const unsigned srv__notify_ready_req__field_indices_by_name[] = { 3, /* field[3] = instanceIdx */ 1, /* field[1] = nctxs */ 4, /* field[4] = ntgts */ + 6, /* field[6] = secondaryUris */ 0, /* field[0] = uri */ }; static const ProtobufCIntRange srv__notify_ready_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 6 } + { 0, 7 } }; const ProtobufCMessageDescriptor srv__notify_ready_req__descriptor = { @@ -373,7 +386,7 @@ const ProtobufCMessageDescriptor srv__notify_ready_req__descriptor = "Srv__NotifyReadyReq", "srv", sizeof(Srv__NotifyReadyReq), - 6, + 7, srv__notify_ready_req__field_descriptors, srv__notify_ready_req__field_indices_by_name, 1, srv__notify_ready_req__number_ranges, @@ -557,7 +570,7 @@ static const ProtobufCFieldDescriptor srv__get_pool_svc_resp__field_descriptors[ offsetof(Srv__GetPoolSvcResp, svcreps), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, }; @@ -658,7 +671,7 @@ static const ProtobufCFieldDescriptor srv__pool_find_by_label_resp__field_descri offsetof(Srv__PoolFindByLabelResp, svcreps), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, }; diff --git a/src/engine/srv.pb-c.h b/src/engine/srv.pb-c.h index 603b0daaf60..902cd15fa49 100644 --- a/src/engine/srv.pb-c.h +++ b/src/engine/srv.pb-c.h @@ -10,7 +10,7 @@ PROTOBUF_C__BEGIN_DECLS #if PROTOBUF_C_VERSION_NUMBER < 1003000 # error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. -#elif 1003003 < PROTOBUF_C_MIN_COMPILER_VERSION +#elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION # error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. #endif @@ -32,11 +32,11 @@ struct _Srv__NotifyReadyReq { ProtobufCMessage base; /* - * CaRT URI + * Primary CaRT URI */ char *uri; /* - * Number of CaRT contexts + * Number of primary CaRT contexts */ uint32_t nctxs; /* @@ -55,10 +55,15 @@ struct _Srv__NotifyReadyReq * HLC incarnation number */ uint64_t incarnation; + /* + * secondary CaRT URIs + */ + size_t n_secondaryuris; + char **secondaryuris; }; #define SRV__NOTIFY_READY_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&srv__notify_ready_req__descriptor) \ - , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, 0, 0 } + , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL } struct _Srv__BioErrorReq diff --git a/src/proto/srv/srv.proto b/src/proto/srv/srv.proto index c63a402501c..28393ac4cad 100644 --- a/src/proto/srv/srv.proto +++ b/src/proto/srv/srv.proto @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2021 Intel Corporation. +// (C) Copyright 2019-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,12 +12,13 @@ package srv; option go_package = "github.com/daos-stack/daos/src/control/common/proto/srv"; message NotifyReadyReq { - string uri = 1; // CaRT URI - uint32 nctxs = 2; // Number of CaRT contexts + string uri = 1; // Primary CaRT URI + uint32 nctxs = 2; // Number of primary CaRT contexts string drpcListenerSock = 3; // Path to I/O Engine's dRPC listener socket uint32 instanceIdx = 4; // I/O Engine instance index uint32 ntgts = 5; // number of VOS targets allocated in I/O Engine uint64 incarnation = 6; // HLC incarnation number + repeated string secondaryUris = 7; // secondary CaRT URIs } // NotifyReadyResp is nil. From 7fcd5be712189b8aed8e5fd37f71cb061e24f981 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 7 Jul 2022 22:55:39 -0700 Subject: [PATCH 16/28] CART-89 bug: Fix provider settings (#9644) - Fix a bug that caused provider settings (like sep override) to not take effect due to mixed arguments Signed-off-by: Alexander A Oganezov --- src/cart/crt_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 0b927841051..2a9295b4072 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -441,7 +441,7 @@ check_grpid(crt_group_id_t grpid) } static int -prov_settings_apply(crt_provider_t prov, bool primary, crt_init_options_t *opt) +prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) { char *srx_env; int rc = 0; From 5cda9a755ff5dac2569e920d52bf0fa7abe496bf Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 11 Jul 2022 12:28:21 -0600 Subject: [PATCH 17/28] DAOS-11027 control: Set client secondary provider env (#9559) - Include a provider index in each provider's ClientNetHint in GetAttachInfo. This index corresponds to the position in the list of providers, starting with 0 for the primary provider. - The provider index is used to set the environment variable CRT_SECONDARY_PROVIDER in the client lib. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/mgmt_rpc.go | 2 + src/control/common/proto/mgmt/svc.pb.go | 113 +++++++++++++----------- src/control/lib/control/network.go | 1 + src/control/server/server.go | 1 + src/include/daos/mgmt.h | 1 + src/mgmt/cli_mgmt.c | 16 +++- src/mgmt/svc.pb-c.c | 21 ++++- src/mgmt/svc.pb-c.h | 8 +- src/proto/mgmt/svc.proto | 3 +- 9 files changed, 104 insertions(+), 62 deletions(-) diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index 034934f3ff4..ee6a39734ea 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -178,6 +178,8 @@ func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, req *mgm return nil, err } + mod.log.Debugf("provider idx: %d", resp.ClientNetHint.ProviderIdx) + // Requested fabric interface/domain behave as a simple override. If we weren't able to // validate them, we return them to the user with the understanding that perhaps the user // knows what they're doing. diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 3098d03b5e1..1a719e80034 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,8 +6,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.26.0 -// protoc v3.6.1 +// protoc-gen-go v1.28.0 +// protoc v3.5.0 // source: mgmt/svc.proto package mgmt @@ -605,7 +605,8 @@ type ClientNetHint struct { CrtTimeout uint32 `protobuf:"varint,5,opt,name=crt_timeout,json=crtTimeout,proto3" json:"crt_timeout,omitempty"` // CaRT CRT_TIMEOUT NetDevClass uint32 `protobuf:"varint,6,opt,name=net_dev_class,json=netDevClass,proto3" json:"net_dev_class,omitempty"` // ARP protocol hardware identifier of the // I/O Engine network interface - SrvSrxSet int32 `protobuf:"varint,7,opt,name=srv_srx_set,json=srvSrxSet,proto3" json:"srv_srx_set,omitempty"` // Server SRX setting (-1, 0, 1; -1 == unset) + SrvSrxSet int32 `protobuf:"varint,7,opt,name=srv_srx_set,json=srvSrxSet,proto3" json:"srv_srx_set,omitempty"` // Server SRX setting (-1, 0, 1; -1 == unset) + ProviderIdx uint32 `protobuf:"varint,8,opt,name=provider_idx,json=providerIdx,proto3" json:"provider_idx,omitempty"` // Provider index - anything > 0 is a secondary provider } func (x *ClientNetHint) Reset() { @@ -689,6 +690,13 @@ func (x *ClientNetHint) GetSrvSrxSet() int32 { return 0 } +func (x *ClientNetHint) GetProviderIdx() uint32 { + if x != nil { + return x.ProviderIdx + } + return 0 +} + type GetAttachInfoResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1170,7 +1178,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, - 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0xf3, 0x01, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, + 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0x96, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, @@ -1185,53 +1193,56 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x22, 0xb1, 0x03, 0x0a, - 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, - 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, - 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, - 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, - 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, - 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, - 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, - 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x11, - 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, - 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x63, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x73, 0x18, - 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, - 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, - 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, - 0x6e, 0x74, 0x73, 0x1a, 0x4b, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, - 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x75, 0x72, 0x69, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, - 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, - 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, - 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, 0x0a, 0x0a, 0x53, 0x65, - 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x7c, 0x0a, 0x0e, - 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, - 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, - 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, - 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, - 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, + 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, + 0xb1, 0x03, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, + 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, + 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, + 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, + 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, + 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, + 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, + 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, + 0x69, 0x6e, 0x74, 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, + 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, + 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, + 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, + 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, + 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, + 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, + 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, + 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, 0x4b, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, + 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, + 0x64, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, + 0x64, 0x65, 0x72, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, + 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, + 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, 0x0a, + 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, + 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, + 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, + 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, + 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, + 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, + 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/src/control/lib/control/network.go b/src/control/lib/control/network.go index 760a715c6da..3d63f435cd4 100644 --- a/src/control/lib/control/network.go +++ b/src/control/lib/control/network.go @@ -222,6 +222,7 @@ type ( CrtTimeout uint32 `json:"crt_timeout"` NetDevClass uint32 `json:"net_dev_class"` SrvSrxSet int32 `json:"srv_srx_set"` + ProviderIdx uint32 `json:"provider_idx"` } GetAttachInfoResp struct { diff --git a/src/control/server/server.go b/src/control/server/server.go index 41375ef2065..c206a3715be 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -414,6 +414,7 @@ func (srv *server) setupGrpc() error { CrtTimeout: srv.cfg.Fabric.CrtTimeout, NetDevClass: uint32(srv.netDevClass[i]), SrvSrxSet: srxSetting, + ProviderIdx: uint32(i), }) } srv.mgmtSvc.clientNetworkHint = clientNetHints diff --git a/src/include/daos/mgmt.h b/src/include/daos/mgmt.h index 792532c860c..df698950098 100644 --- a/src/include/daos/mgmt.h +++ b/src/include/daos/mgmt.h @@ -39,6 +39,7 @@ struct dc_mgmt_sys_info { uint32_t crt_timeout; int32_t srv_srx_set; d_rank_list_t *ms_ranks; + uint32_t provider_idx; /* Provider index (if more than one available) */ }; /** Client system handle */ diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c index 20b145dfc80..14dfd32309c 100644 --- a/src/mgmt/cli_mgmt.c +++ b/src/mgmt/cli_mgmt.c @@ -178,13 +178,15 @@ fill_sys_info(Mgmt__GetAttachInfoResp *resp, struct dc_mgmt_sys_info *info) info->ms_ranks->rl_ranks[i]); } + info->provider_idx = resp->client_net_hint->provider_idx; + D_DEBUG(DB_MGMT, "GetAttachInfo Provider: %s, Interface: %s, Domain: %s," "CRT_CTX_SHARE_ADDR: %u, CRT_TIMEOUT: %u, " - "FI_OFI_RXM_USE_SRX: %d\n", + "FI_OFI_RXM_USE_SRX: %d, CRT_SECONDARY_PROVIDER: %d\n", info->provider, info->interface, info->domain, info->crt_ctx_share_addr, info->crt_timeout, - info->srv_srx_set); + info->srv_srx_set, info->provider_idx); return 0; } @@ -407,13 +409,19 @@ int dc_mgmt_net_cfg(const char *name) if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); + sprintf(buf, "%d", info.provider_idx); + rc = setenv("CRT_SECONDARY_PROVIDER", buf, 1); + if (rc != 0) + D_GOTO(cleanup, rc = d_errno2der(errno)); + D_DEBUG(DB_MGMT, "CaRT initialization with:\n" "\tOFI_INTERFACE=%s, OFI_DOMAIN: %s, CRT_PHY_ADDR_STR: %s, " - "CRT_CTX_SHARE_ADDR: %s, CRT_TIMEOUT: %s\n", + "CRT_CTX_SHARE_ADDR: %s, CRT_TIMEOUT: %s, CRT_SECONDARY_PROVIDER: %s\n", getenv("OFI_INTERFACE"), getenv("OFI_DOMAIN"), getenv("CRT_PHY_ADDR_STR"), - getenv("CRT_CTX_SHARE_ADDR"), getenv("CRT_TIMEOUT")); + getenv("CRT_CTX_SHARE_ADDR"), getenv("CRT_TIMEOUT"), + getenv("CRT_SECONDARY_PROVIDER")); cleanup: put_attach_info(&info, resp); diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index c9f943153e3..f549232f618 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1266,7 +1266,7 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_req__descriptor = (ProtobufCMessageInit) mgmt__get_attach_info_req__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__client_net_hint__field_descriptors[7] = +static const ProtobufCFieldDescriptor mgmt__client_net_hint__field_descriptors[8] = { { "provider", @@ -1352,6 +1352,18 @@ static const ProtobufCFieldDescriptor mgmt__client_net_hint__field_descriptors[7 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "provider_idx", + 8, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__ClientNetHint, provider_idx), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__client_net_hint__field_indices_by_name[] = { 3, /* field[3] = crt_ctx_share_addr */ @@ -1360,12 +1372,13 @@ static const unsigned mgmt__client_net_hint__field_indices_by_name[] = { 1, /* field[1] = interface */ 5, /* field[5] = net_dev_class */ 0, /* field[0] = provider */ + 7, /* field[7] = provider_idx */ 6, /* field[6] = srv_srx_set */ }; static const ProtobufCIntRange mgmt__client_net_hint__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 7 } + { 0, 8 } }; const ProtobufCMessageDescriptor mgmt__client_net_hint__descriptor = { @@ -1375,7 +1388,7 @@ const ProtobufCMessageDescriptor mgmt__client_net_hint__descriptor = "Mgmt__ClientNetHint", "mgmt", sizeof(Mgmt__ClientNetHint), - 7, + 8, mgmt__client_net_hint__field_descriptors, mgmt__client_net_hint__field_indices_by_name, 1, mgmt__client_net_hint__number_ranges, @@ -1481,7 +1494,7 @@ static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descript offsetof(Mgmt__GetAttachInfoResp, ms_ranks), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 5c384cb0616..300e45c77b5 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -10,7 +10,7 @@ PROTOBUF_C__BEGIN_DECLS #if PROTOBUF_C_VERSION_NUMBER < 1003000 # error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. -#elif 1003003 < PROTOBUF_C_MIN_COMPILER_VERSION +#elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION # error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. #endif @@ -264,10 +264,14 @@ struct _Mgmt__ClientNetHint * Server SRX setting (-1, 0, 1; -1 == unset) */ int32_t srv_srx_set; + /* + * Provider index - anything > 0 is a secondary provider + */ + uint32_t provider_idx; }; #define MGMT__CLIENT_NET_HINT__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__client_net_hint__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, 0 } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, 0, 0 } struct _Mgmt__GetAttachInfoResp__RankUri diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 38c137dc1b7..2abf7084029 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -79,7 +79,8 @@ message ClientNetHint { uint32 crt_timeout = 5; // CaRT CRT_TIMEOUT uint32 net_dev_class = 6; // ARP protocol hardware identifier of the // I/O Engine network interface - int32 srv_srx_set = 7; // Server SRX setting (-1, 0, 1; -1 == unset) + int32 srv_srx_set = 7; // Server SRX setting (-1, 0, 1; -1 == unset) + uint32 provider_idx = 8; // Provider index - anything > 0 is a secondary provider } message GetAttachInfoResp { From 635c0ac8b0375226f483717b03ac04216f913cb8 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Wed, 13 Jul 2022 09:13:40 -0600 Subject: [PATCH 18/28] DAOS-11038 control: Switch to using provider_idx (#9648) * DAOS-11038 control: Switch to using provider_idx - Instead of "provider" use "provider_idx" in the agent config to select secondary provider. This covers the case where we have multiple networks using the same provider. - Add list of providers with corresponding provider_idx to dmg system query command. - Add number of secondary ctxs to the GetAttachInfoResp. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/attachinfo.go | 2 +- src/control/cmd/daos_agent/config.go | 2 +- src/control/cmd/daos_agent/config_test.go | 4 +- src/control/cmd/daos_agent/mgmt_rpc.go | 124 +++++++------ src/control/cmd/daos_agent/mgmt_rpc_test.go | 154 ++++++++-------- src/control/cmd/daos_agent/start.go | 2 +- src/control/cmd/dmg/pretty/system.go | 24 ++- src/control/cmd/dmg/pretty/system_test.go | 22 +++ src/control/common/proto/mgmt/svc.pb.go | 191 +++++++++++--------- src/control/common/proto/mgmt/system.pb.go | 160 ++++++++-------- src/control/common/proto/srv/srv.pb.go | 92 +++++----- src/control/lib/control/network.go | 6 +- src/control/lib/control/system.go | 22 ++- src/control/server/instance.go | 17 +- src/control/server/mgmt_system.go | 24 +-- src/control/server/mgmt_system_test.go | 90 +++++++-- src/engine/drpc_client.c | 17 +- src/engine/srv.pb-c.c | 19 +- src/engine/srv.pb-c.h | 7 +- src/engine/tests/drpc_client_tests.c | 1 + src/mgmt/svc.pb-c.c | 29 ++- src/mgmt/svc.pb-c.h | 11 +- src/proto/mgmt/svc.proto | 3 +- src/proto/mgmt/system.proto | 1 + src/proto/srv/srv.proto | 1 + utils/config/daos_agent.yml | 21 ++- 26 files changed, 626 insertions(+), 420 deletions(-) diff --git a/src/control/cmd/daos_agent/attachinfo.go b/src/control/cmd/daos_agent/attachinfo.go index 0bf1362490f..ed8ac79ff24 100644 --- a/src/control/cmd/daos_agent/attachinfo.go +++ b/src/control/cmd/daos_agent/attachinfo.go @@ -107,7 +107,7 @@ func getServiceRanksForProviderIdx(inResp *control.GetAttachInfoResp, idx int) ( hint := inResp.AlternateClientNetHints[secIdx] ranks := make([]*control.PrimaryServiceRank, 0) for _, r := range inResp.AlternateServiceRanks { - if r.Provider == hint.Provider { + if r.ProviderIdx == hint.ProviderIdx { ranks = append(ranks, r) } } diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index ad7d95f80f0..6f8792f738c 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -36,7 +36,7 @@ type Config struct { DisableCache bool `yaml:"disable_caching,omitempty"` DisableAutoEvict bool `yaml:"disable_auto_evict,omitempty"` FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` - Provider string `yaml:"provider,omitempty"` + ProviderIdx uint `yaml:"provider_idx"` } // NUMAFabricConfig defines a list of fabric interfaces that belong to a NUMA diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index dd668cf7fcc..8e08f71d1fd 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -65,7 +65,7 @@ fabric_ifaces: - iface: ib3 domain: mlx5_3 -provider: ofi+tcp +provider_idx: 1 `) badLogMaskCfg := test.CreateTestFile(t, dir, ` @@ -161,7 +161,7 @@ transport_config: }, }, }, - Provider: "ofi+tcp", + ProviderIdx: 1, }, }, } { diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index ee6a39734ea..c3202f760f0 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -8,6 +8,7 @@ package main import ( "net" + "strings" "sync" "time" @@ -43,7 +44,7 @@ type mgmtModule struct { useDefaultNUMA bool numaGetter hardware.ProcessNUMAProvider - provider string + providerIdx uint devClassGetter hardware.NetDevClassProvider devStateGetter hardware.NetDevStateProvider fabricScanner *hardware.FabricScanner @@ -169,17 +170,11 @@ func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, req *mgm return nil, err } - mod.log.Debugf("full GetAttachInfoResp: %+v", rawResp) - - reqProviders := mod.getInterfaceProviders(req.Interface, req.Domain) - - resp, err := mod.selectAttachInfo(rawResp, reqProviders) + resp, err := mod.selectAttachInfo(ctx, rawResp, req.Interface, req.Domain) if err != nil { return nil, err } - mod.log.Debugf("provider idx: %d", resp.ClientNetHint.ProviderIdx) - // Requested fabric interface/domain behave as a simple override. If we weren't able to // validate them, we return them to the user with the understanding that perhaps the user // knows what they're doing. @@ -216,81 +211,102 @@ func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, numaNode int, sys return mod.attachInfo.Get(ctx, numaNode, sys, mod.getAttachInfoRemote) } -func (mod *mgmtModule) getInterfaceProviders(iface, domain string) common.StringSet { +func (mod *mgmtModule) selectAttachInfo(ctx context.Context, srvResp *mgmtpb.GetAttachInfoResp, iface, domain string) (*mgmtpb.GetAttachInfoResp, error) { + reqProviders := mod.getIfaceProviders(ctx, iface, domain) + mod.log.Debugf("requested interface %q (domain: %q) supports providers: %s", iface, domain, strings.Join(reqProviders.ToSlice(), ", ")) + + if mod.providerIdx > 0 { + // Secondary provider indices begin at 1 + resp, err := mod.selectSecondaryAttachInfo(srvResp, mod.providerIdx) + if err != nil { + return nil, err + } + + if len(reqProviders) != 0 && !reqProviders.Has(resp.ClientNetHint.Provider) { + mod.log.Errorf("requested fabric interface %q (domain: %q) does not report support for configured provider %q (idx %d)", + iface, domain, resp.ClientNetHint.Provider, mod.providerIdx) + } + + return resp, nil + } + + if len(reqProviders) == 0 || reqProviders.Has(srvResp.ClientNetHint.Provider) { + return srvResp, nil + } + + mod.log.Debugf("primary provider is not supported by requested interface %q domain %q (supports: %s)", iface, domain, strings.Join(reqProviders.ToSlice(), ", ")) + + // We can try to be smart about choosing a provider if the client requested a specific interface + for _, hint := range srvResp.SecondaryClientNetHints { + if reqProviders.Has(hint.Provider) { + mod.log.Debugf("found secondary provider supported by requested interface: %q (idx %d)", hint.Provider, hint.ProviderIdx) + return mod.selectSecondaryAttachInfo(srvResp, uint(hint.ProviderIdx)) + } + } + + mod.log.Errorf("no supported provider for requested interface %q domain %q, using primary by default") + return srvResp, nil +} + +func (mod *mgmtModule) getIfaceProviders(ctx context.Context, iface, domain string) common.StringSet { + providers := common.NewStringSet() if iface == "" { - return nil + return providers } if domain == "" { domain = iface } - fis, err := mod.fabricInfo.localNUMAFabric.FindDevice(&FabricIfaceParams{ + if fis, err := mod.getFabricInterface(ctx, &FabricIfaceParams{ Interface: iface, Domain: domain, - }) - if err != nil { - mod.log.Errorf("client-requested fabric interface/domain not detected: %s", err.Error()) - mod.log.Error("communications on this interface may fail") - return nil + }); err != nil { + mod.log.Errorf("requested fabric interface %q (domain %q) may not function as desired: %s", iface, domain, err) + } else { + providers.Add(fis.Providers()...) } - providers := common.NewStringSet() - for _, fi := range fis { - providers.AddUnique(fi.Providers()...) - } return providers } -func (mod *mgmtModule) selectAttachInfo(srvResp *mgmtpb.GetAttachInfoResp, reqProviders common.StringSet) (*mgmtpb.GetAttachInfoResp, error) { - providers := reqProviders - if mod.provider != "" { - if len(reqProviders) > 0 && !reqProviders.Has(mod.provider) { - mod.log.Errorf("configured provider %q not included in requested interface's detected providers: %s", reqProviders) - mod.log.Error("communications on this interface may fail") - } - providers = common.NewStringSet(mod.provider) - - mod.log.Debugf("using configured provider: %s", mod.provider) +func (mod *mgmtModule) selectSecondaryAttachInfo(srvResp *mgmtpb.GetAttachInfoResp, provIdx uint) (*mgmtpb.GetAttachInfoResp, error) { + if provIdx == 0 { + return nil, errors.New("provider index 0 is not a secondary provider") } - - if len(providers) == 0 { - return srvResp, nil + maxIdx := len(srvResp.SecondaryClientNetHints) + if int(provIdx) > maxIdx { + return nil, errors.Errorf("provider index %d out of range (maximum: %d)", provIdx, maxIdx) } - if providers.Has(srvResp.ClientNetHint.Provider) { - return srvResp, nil + hint := srvResp.SecondaryClientNetHints[provIdx-1] + if hint.ProviderIdx != uint32(provIdx) { + return nil, errors.Errorf("malformed network hint: expected provider index %d, got %d", provIdx, hint.ProviderIdx) } - - for _, hint := range srvResp.SecondaryClientNetHints { - if providers.Has(hint.Provider) { - mod.log.Debugf("getting secondary provider %s URIs", hint.Provider) - uris, err := mod.getProviderURIs(srvResp, hint.Provider) - if err == nil { - return &mgmtpb.GetAttachInfoResp{ - Status: srvResp.Status, - RankUris: uris, - MsRanks: srvResp.MsRanks, - ClientNetHint: hint, - }, nil - } - mod.log.Error(err.Error()) - } + mod.log.Debugf("getting secondary provider %s URIs", hint.Provider) + uris, err := mod.getProviderIdxURIs(srvResp, provIdx) + if err != nil { + return nil, err } - return nil, errors.Errorf("no valid connection information for providers: %s", providers) + return &mgmtpb.GetAttachInfoResp{ + Status: srvResp.Status, + RankUris: uris, + MsRanks: srvResp.MsRanks, + ClientNetHint: hint, + }, nil } -func (mod *mgmtModule) getProviderURIs(srvResp *mgmtpb.GetAttachInfoResp, provider string) ([]*mgmtpb.GetAttachInfoResp_RankUri, error) { +func (mod *mgmtModule) getProviderIdxURIs(srvResp *mgmtpb.GetAttachInfoResp, idx uint) ([]*mgmtpb.GetAttachInfoResp_RankUri, error) { uris := []*mgmtpb.GetAttachInfoResp_RankUri{} for _, uri := range srvResp.SecondaryRankUris { - if uri.Provider == provider { + if uri.ProviderIdx == uint32(idx) { uris = append(uris, uri) } } if len(uris) == 0 { - return nil, errors.Errorf("no rank URIs for provider %q", provider) + return nil, errors.Errorf("no rank URIs for provider idx %d", mod.providerIdx) } return uris, nil diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go index b9ea8bd1346..8bffce61f88 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc_test.go +++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go @@ -44,51 +44,48 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { return &mgmtpb.GetAttachInfoResp{ RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: 0, - Uri: "uri0", - Provider: "ofi+verbs", + Rank: 0, + Uri: "uri0", }, { - Rank: 1, - Uri: "uri1", - Provider: "ofi+verbs", + Rank: 1, + Uri: "uri1", }, { - Rank: 3, - Uri: "uri3", - Provider: "ofi+verbs", + Rank: 3, + Uri: "uri3", }, }, SecondaryRankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: 0, - Uri: "uri4-sec", - Provider: "ofi+sockets", + Rank: 0, + Uri: "uri4-sec", + ProviderIdx: 2, }, { - Rank: 1, - Uri: "uri5-sec", - Provider: "ofi+sockets", + Rank: 1, + Uri: "uri5-sec", + ProviderIdx: 2, }, { - Rank: 3, - Uri: "uri6-sec", - Provider: "ofi+sockets", + Rank: 3, + Uri: "uri6-sec", + ProviderIdx: 2, }, { - Rank: 0, - Uri: "uri0-sec", - Provider: "ofi+tcp", + Rank: 0, + Uri: "uri0-sec", + ProviderIdx: 1, }, { - Rank: 1, - Uri: "uri1-sec", - Provider: "ofi+tcp", + Rank: 1, + Uri: "uri1-sec", + ProviderIdx: 1, }, { - Rank: 3, - Uri: "uri3-sec", - Provider: "ofi+tcp", + Rank: 3, + Uri: "uri3-sec", + ProviderIdx: 1, }, }, MsRanks: []uint32{0, 1, 3}, @@ -100,6 +97,12 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { { Provider: "ofi+tcp", NetDevClass: uint32(hardware.Infiniband), + ProviderIdx: 1, + }, + { + Provider: "badidx", + NetDevClass: uint32(hardware.Ether), + ProviderIdx: 0, // bad for secondary }, }, } @@ -117,19 +120,19 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { return &mgmtpb.GetAttachInfoResp{ RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: 0, - Uri: "uri0-sec", - Provider: "ofi+tcp", + Rank: 0, + Uri: "uri0-sec", + ProviderIdx: 1, }, { - Rank: 1, - Uri: "uri1-sec", - Provider: "ofi+tcp", + Rank: 1, + Uri: "uri1-sec", + ProviderIdx: 1, }, { - Rank: 3, - Uri: "uri3-sec", - Provider: "ofi+tcp", + Rank: 3, + Uri: "uri3-sec", + ProviderIdx: 1, }, }, MsRanks: []uint32{0, 1, 3}, @@ -138,18 +141,19 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { NetDevClass: uint32(hardware.Infiniband), Interface: fi, Domain: domain, + ProviderIdx: 1, }, } } for name, tc := range map[string]struct { - reqIface string - reqDomain string - provider string - numaNode int - rpcResp *control.HostResponse - expResp *mgmtpb.GetAttachInfoResp - expErr error + reqIface string + reqDomain string + providerIdx uint + numaNode int + rpcResp *control.HostResponse + expResp *mgmtpb.GetAttachInfoResp + expErr error }{ "RPC error": { rpcResp: &control.HostResponse{ @@ -162,9 +166,8 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { Message: &mgmtpb.GetAttachInfoResp{ RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: 0, - Uri: "uri0", - Provider: "ofi+verbs", + Rank: 0, + Uri: "uri0", }, }, MsRanks: []uint32{0}, @@ -180,9 +183,8 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { Message: &mgmtpb.GetAttachInfoResp{ RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: 0, - Uri: "uri0", - Provider: "notreal", + Rank: 0, + Uri: "uri0", }, }, MsRanks: []uint32{0}, @@ -194,22 +196,14 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, expErr: errors.New("no suitable fabric interface"), }, - "basic success": { - - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: priResp("fi0", "d0"), - }, "primary provider": { - provider: "ofi+verbs", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, expResp: priResp("fi0", "d0"), }, "secondary provider": { - provider: "ofi+tcp", + providerIdx: 1, rpcResp: &control.HostResponse{ Message: testSrvResp(), }, @@ -218,16 +212,15 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { "client req iface and domain": { reqIface: "fi1", reqDomain: "d1", - provider: "ofi+verbs", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, expResp: priResp("fi1", "d1"), }, "client req secondary provider": { - reqIface: "fi1", - reqDomain: "fi1", - provider: "ofi+tcp", + reqIface: "fi1", + reqDomain: "fi1", + providerIdx: 1, rpcResp: &control.HostResponse{ Message: testSrvResp(), }, @@ -250,16 +243,15 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, "client req domain-only ignored": { reqDomain: "d2", - provider: "ofi+verbs", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, expResp: priResp("fi0", "d0"), }, "client req provider mismatch ignored": { - reqIface: "fi1", - reqDomain: "d1", - provider: "ofi+tcp", + reqIface: "fi1", + reqDomain: "d1", + providerIdx: 1, rpcResp: &control.HostResponse{ Message: testSrvResp(), }, @@ -268,7 +260,6 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { "client req iface/domain mismatch ignored": { reqIface: "fi0", reqDomain: "d2", - provider: "ofi+verbs", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, @@ -276,25 +267,31 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { }, "client req iface not found ignored": { reqIface: "notreal", - provider: "ofi+verbs", rpcResp: &control.HostResponse{ Message: testSrvResp(), }, expResp: priResp("notreal", "notreal"), }, - "config provider not found": { - provider: "notreal", + "client req iface idx malformed": { + reqIface: "bad1", + rpcResp: &control.HostResponse{ + Message: testSrvResp(), + }, + expErr: errors.New("not a secondary provider"), + }, + "config provider idx out of range": { + providerIdx: 5, rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expErr: errors.New("no valid connection information"), + expErr: errors.New("out of range"), }, - "config provider hint missing": { - provider: "ofi+sockets", + "malformed hint at sec provider idx": { + providerIdx: 2, rpcResp: &control.HostResponse{ Message: testSrvResp(), }, - expErr: errors.New("no valid connection information"), + expErr: errors.New("provider index"), }, } { t.Run(name, func(t *testing.T) { @@ -337,6 +334,13 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { Providers: common.NewStringSet("ofi+tcp"), }, }, + { + Name: "bad1", + NetDevClass: hardware.Ether, + hw: &hardware.FabricInterface{ + Providers: common.NewStringSet("badidx"), + }, + }, }, }, } @@ -353,7 +357,7 @@ func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { Responses: []*control.HostResponse{tc.rpcResp}, }, }), - provider: tc.provider, + providerIdx: tc.providerIdx, } resp, err := mod.getAttachInfo(context.Background(), tc.numaNode, diff --git a/src/control/cmd/daos_agent/start.go b/src/control/cmd/daos_agent/start.go index 70cdc19e579..a372ed7a5f2 100644 --- a/src/control/cmd/daos_agent/start.go +++ b/src/control/cmd/daos_agent/start.go @@ -100,7 +100,7 @@ func (cmd *startCmd) Execute(_ []string) error { devClassGetter: hwprov.DefaultNetDevClassProvider(cmd.Logger), devStateGetter: hwprov.DefaultNetDevStateProvider(cmd.Logger), monitor: procmon, - provider: cmd.cfg.Provider, + providerIdx: cmd.cfg.ProviderIdx, }) // Cache hwloc data in context on startup, since it'll be used extensively at runtime. diff --git a/src/control/cmd/dmg/pretty/system.go b/src/control/cmd/dmg/pretty/system.go index f9212939314..6de40571e28 100644 --- a/src/control/cmd/dmg/pretty/system.go +++ b/src/control/cmd/dmg/pretty/system.go @@ -125,13 +125,15 @@ func PrintSystemQueryResponse(out, outErr io.Writer, resp *control.SystemQueryRe return err } printAbsentHosts(outErr, &resp.AbsentHosts) - + printSystemProviders(out, resp.Providers) return nil } printAbsentHosts(outErr, &resp.AbsentHosts) printAbsentRanks(outErr, &resp.AbsentRanks) + printSystemProviders(out, resp.Providers) + return nil } @@ -169,6 +171,26 @@ func printSystemResults(out, outErr io.Writer, results system.MemberResults, abs return nil } +func printSystemProviders(out io.Writer, providers []string) { + if len(providers) == 0 { + return + } + + idxKey := "Idx" + provKey := "Provider" + formatter := txtfmt.NewTableFormatter(idxKey, provKey) + + var table []txtfmt.TableRow + for i, prov := range providers { + table = append(table, txtfmt.TableRow{ + idxKey: fmt.Sprintf("%d", i), + provKey: prov, + }) + } + + fmt.Fprint(out, formatter.Format(table)) +} + // PrintSystemStartResponse generates a human-readable representation of the // supplied SystemStartResp struct and writes it to the supplied io.Writer. func PrintSystemStartResponse(out, outErr io.Writer, resp *control.SystemStartResp) error { diff --git a/src/control/cmd/dmg/pretty/system_test.go b/src/control/cmd/dmg/pretty/system_test.go index b417575f19b..775dc2d7b31 100644 --- a/src/control/cmd/dmg/pretty/system_test.go +++ b/src/control/cmd/dmg/pretty/system_test.go @@ -313,6 +313,28 @@ Rank UUID Control Address Fault Domain State Unknown 3 hosts: foo[7-9] Unknown 3 ranks: 7-9 +`, + }, + "providers": { + resp: &control.SystemQueryResp{ + Members: Members{ + MockMember(t, 0, MemberStateJoined), + MockMember(t, 1, MemberStateJoined), + MockMember(t, 2, MemberStateStopped), + }, + Providers: []string{"ofi+verbs", "ofi+tcp", "ofi+tcp"}, + }, + expPrintStr: ` +Rank State +---- ----- +[0-1] Joined +2 Stopped + +Idx Provider +--- -------- +0 ofi+verbs +1 ofi+tcp +2 ofi+tcp `, }, } { diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 1a719e80034..12e335867cf 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -227,16 +227,17 @@ type JoinReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system name. - Uuid string `protobuf:"bytes,2,opt,name=uuid,proto3" json:"uuid,omitempty"` // Server UUID. - Rank uint32 `protobuf:"varint,3,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank desired, if not MAX_UINT32. - Uri string `protobuf:"bytes,4,opt,name=uri,proto3" json:"uri,omitempty"` // Server CaRT primary provider URI (i.e., for context 0). - Nctxs uint32 `protobuf:"varint,5,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Server CaRT context count. - Addr string `protobuf:"bytes,6,opt,name=addr,proto3" json:"addr,omitempty"` // Server management address. - SrvFaultDomain string `protobuf:"bytes,7,opt,name=srvFaultDomain,proto3" json:"srvFaultDomain,omitempty"` // Fault domain for this instance's server - Idx uint32 `protobuf:"varint,8,opt,name=idx,proto3" json:"idx,omitempty"` // Instance index on server node. - Incarnation uint64 `protobuf:"varint,9,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // rank incarnation - SecondaryUris []string `protobuf:"bytes,10,rep,name=secondary_uris,json=secondaryUris,proto3" json:"secondary_uris,omitempty"` // URIs for any secondary providers + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system name. + Uuid string `protobuf:"bytes,2,opt,name=uuid,proto3" json:"uuid,omitempty"` // Server UUID. + Rank uint32 `protobuf:"varint,3,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank desired, if not MAX_UINT32. + Uri string `protobuf:"bytes,4,opt,name=uri,proto3" json:"uri,omitempty"` // Server CaRT primary provider URI (i.e., for context 0). + Nctxs uint32 `protobuf:"varint,5,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Server CaRT context count. + Addr string `protobuf:"bytes,6,opt,name=addr,proto3" json:"addr,omitempty"` // Server management address. + SrvFaultDomain string `protobuf:"bytes,7,opt,name=srvFaultDomain,proto3" json:"srvFaultDomain,omitempty"` // Fault domain for this instance's server + Idx uint32 `protobuf:"varint,8,opt,name=idx,proto3" json:"idx,omitempty"` // Instance index on server node. + Incarnation uint64 `protobuf:"varint,9,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // rank incarnation + SecondaryUris []string `protobuf:"bytes,10,rep,name=secondary_uris,json=secondaryUris,proto3" json:"secondary_uris,omitempty"` // URIs for any secondary providers + SecondaryNctxs []uint32 `protobuf:"varint,11,rep,packed,name=secondary_nctxs,json=secondaryNctxs,proto3" json:"secondary_nctxs,omitempty"` // CaRT context count for each secondary provider } func (x *JoinReq) Reset() { @@ -341,6 +342,13 @@ func (x *JoinReq) GetSecondaryUris() []string { return nil } +func (x *JoinReq) GetSecondaryNctxs() []uint32 { + if x != nil { + return x.SecondaryNctxs + } + return nil +} + type JoinResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1058,9 +1066,9 @@ type GetAttachInfoResp_RankUri struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` - Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` - Provider string `protobuf:"bytes,3,opt,name=provider,proto3" json:"provider,omitempty"` + Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` + Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` + ProviderIdx uint32 `protobuf:"varint,3,opt,name=provider_idx,json=providerIdx,proto3" json:"provider_idx,omitempty"` } func (x *GetAttachInfoResp_RankUri) Reset() { @@ -1109,11 +1117,11 @@ func (x *GetAttachInfoResp_RankUri) GetUri() string { return "" } -func (x *GetAttachInfoResp_RankUri) GetProvider() string { +func (x *GetAttachInfoResp_RankUri) GetProviderIdx() uint32 { if x != nil { - return x.Provider + return x.ProviderIdx } - return "" + return 0 } var File_mgmt_svc_proto protoreflect.FileDescriptor @@ -1135,7 +1143,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x29, 0x0a, 0x0f, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x22, 0x82, 0x02, 0x0a, 0x07, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, + 0x22, 0xab, 0x02, 0x0a, 0x07, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, @@ -1151,79 +1159,82 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, - 0x79, 0x55, 0x72, 0x69, 0x73, 0x22, 0xbc, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, - 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, - 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, - 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, - 0x55, 0x54, 0x10, 0x01, 0x22, 0x22, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, - 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x53, 0x0a, 0x0f, 0x4c, 0x65, 0x61, 0x64, - 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x24, 0x0a, 0x0d, 0x63, - 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, - 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x77, 0x0a, - 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, - 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, - 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, - 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0x96, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, - 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, - 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, - 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, - 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, 0x72, - 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, 0x72, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, 0x68, - 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, 0x74, - 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, 0x72, - 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, 0x5f, - 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, - 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, - 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, - 0xb1, 0x03, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, - 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, - 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, - 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, - 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, - 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, - 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, - 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, - 0x69, 0x6e, 0x74, 0x12, 0x4f, 0x0a, 0x13, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, - 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, - 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, - 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, 0x1a, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, - 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, - 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, - 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, - 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, 0x4b, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, - 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, - 0x64, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, - 0x64, 0x65, 0x72, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, + 0x79, 0x55, 0x72, 0x69, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, + 0x72, 0x79, 0x5f, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x0b, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0e, + 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, 0x74, 0x78, 0x73, 0x22, 0xbc, + 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4a, 0x6f, + 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, + 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, + 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, 0x6f, + 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, + 0x6f, 0x69, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x06, 0x0a, 0x02, + 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x22, 0x22, 0x0a, + 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, + 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, + 0x73, 0x22, 0x53, 0x0a, 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x52, 0x65, 0x73, 0x70, 0x12, 0x24, 0x0a, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, + 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, + 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, + 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, + 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x77, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, + 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, + 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, + 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, + 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, + 0x96, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, + 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, + 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, + 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, + 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, + 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, + 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, + 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, + 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, + 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, + 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, + 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, + 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, + 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, 0xb8, 0x03, 0x0a, 0x11, 0x47, 0x65, 0x74, + 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, + 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, + 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, + 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, + 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x4f, 0x0a, 0x13, + 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, + 0x72, 0x69, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x11, 0x73, 0x65, 0x63, 0x6f, + 0x6e, 0x64, 0x61, 0x72, 0x79, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x50, 0x0a, + 0x1a, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, + 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, + 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, + 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x1a, + 0x52, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, + 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, + 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, + 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, + 0x49, 0x64, 0x78, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, diff --git a/src/control/common/proto/mgmt/system.pb.go b/src/control/common/proto/mgmt/system.pb.go index 6a93a3bd197..6893a5a3558 100644 --- a/src/control/common/proto/mgmt/system.pb.go +++ b/src/control/common/proto/mgmt/system.pb.go @@ -6,8 +6,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.26.0 -// protoc v3.6.1 +// protoc-gen-go v1.28.0 +// protoc v3.5.0 // source: mgmt/system.proto package mgmt @@ -512,6 +512,7 @@ type SystemQueryResp struct { Members []*SystemMember `protobuf:"bytes,1,rep,name=members,proto3" json:"members,omitempty"` Absentranks string `protobuf:"bytes,2,opt,name=absentranks,proto3" json:"absentranks,omitempty"` // rankset missing from membership Absenthosts string `protobuf:"bytes,3,opt,name=absenthosts,proto3" json:"absenthosts,omitempty"` // hostset missing from membership + Providers []string `protobuf:"bytes,4,rep,name=providers,proto3" json:"providers,omitempty"` // Providers supported by system in configured order } func (x *SystemQueryResp) Reset() { @@ -567,6 +568,13 @@ func (x *SystemQueryResp) GetAbsenthosts() string { return "" } +func (x *SystemQueryResp) GetProviders() []string { + if x != nil { + return x.Providers + } + return nil +} + // SystemEraseReq supplies system erase parameters. type SystemEraseReq struct { state protoimpl.MessageState @@ -1222,7 +1230,7 @@ var file_mgmt_system_proto_rawDesc = []byte{ 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, - 0x83, 0x01, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0xa1, 0x01, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x52, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, @@ -1230,82 +1238,84 @@ var file_mgmt_system_proto_rawDesc = []byte{ 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x62, 0x73, 0x65, 0x6e, 0x74, - 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x22, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, - 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x3f, 0x0a, 0x0f, 0x53, 0x79, 0x73, - 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, - 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, - 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x68, 0x6f, 0x73, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, + 0x72, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x09, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, + 0x65, 0x72, 0x73, 0x22, 0x22, 0x0a, 0x0e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x45, 0x72, 0x61, + 0x73, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x3f, 0x0a, 0x0f, 0x53, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x45, 0x72, 0x61, 0x73, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x73, 0x68, + 0x61, 0x72, 0x65, 0x64, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, + 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, + 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, + 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x18, + 0x0a, 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0xbe, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, + 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x3f, + 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, + 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, + 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, + 0x68, 0x0a, 0x0d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, 0x67, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6d, 0x73, 0x67, 0x12, 0x17, 0x0a, 0x07, 0x70, 0x6f, + 0x6f, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, 0x6f, 0x6f, + 0x6c, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x0d, 0x52, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0xab, 0x01, 0x0a, 0x10, 0x53, 0x79, + 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0xbe, 0x01, 0x0a, 0x11, 0x53, - 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, - 0x12, 0x3f, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x43, - 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6c, 0x65, 0x61, 0x6e, - 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x73, 0x1a, 0x68, 0x0a, 0x0d, 0x43, 0x6c, 0x65, 0x61, 0x6e, 0x75, 0x70, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x73, - 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6d, 0x73, 0x67, 0x12, 0x17, 0x0a, 0x07, - 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, - 0x6f, 0x6f, 0x6c, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0xab, 0x01, 0x0a, 0x10, - 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, - 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, - 0x79, 0x73, 0x12, 0x46, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x2e, 0x41, - 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, - 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x41, 0x74, - 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, 0x10, 0x53, 0x79, 0x73, - 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, - 0x65, 0x79, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, - 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, 0x0a, 0x61, 0x74, 0x74, - 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, - 0x74, 0x72, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, - 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, - 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x22, 0xab, 0x01, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, - 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, - 0x70, 0x52, 0x65, 0x71, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, - 0x1a, 0x3d, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, + 0x12, 0x46, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, + 0x65, 0x6d, 0x53, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x2e, 0x41, 0x74, 0x74, + 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x61, 0x74, + 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x41, 0x74, 0x74, 0x72, + 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, + 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, + 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, + 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, + 0x74, 0x74, 0x72, 0x52, 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, + 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x72, + 0x52, 0x65, 0x73, 0x70, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, + 0x1a, 0x3d, 0x0a, 0x0f, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, - 0x38, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, - 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x11, 0x53, 0x79, - 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x47, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, - 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x72, 0x6f, - 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, - 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, - 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, - 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0xab, 0x01, 0x0a, 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, + 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, + 0x65, 0x71, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x1a, 0x3d, + 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x38, 0x0a, + 0x10, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x11, 0x53, 0x79, 0x73, 0x74, + 0x65, 0x6d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x47, 0x0a, + 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x27, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x47, + 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, + 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x1a, 0x3d, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, + 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, + 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/srv/srv.pb.go b/src/control/common/proto/srv/srv.pb.go index 0ec4d271ba1..c4ed89eb1a4 100644 --- a/src/control/common/proto/srv/srv.pb.go +++ b/src/control/common/proto/srv/srv.pb.go @@ -33,13 +33,14 @@ type NotifyReadyReq struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Uri string `protobuf:"bytes,1,opt,name=uri,proto3" json:"uri,omitempty"` // Primary CaRT URI - Nctxs uint32 `protobuf:"varint,2,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Number of primary CaRT contexts - DrpcListenerSock string `protobuf:"bytes,3,opt,name=drpcListenerSock,proto3" json:"drpcListenerSock,omitempty"` // Path to I/O Engine's dRPC listener socket - InstanceIdx uint32 `protobuf:"varint,4,opt,name=instanceIdx,proto3" json:"instanceIdx,omitempty"` // I/O Engine instance index - Ntgts uint32 `protobuf:"varint,5,opt,name=ntgts,proto3" json:"ntgts,omitempty"` // number of VOS targets allocated in I/O Engine - Incarnation uint64 `protobuf:"varint,6,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // HLC incarnation number - SecondaryUris []string `protobuf:"bytes,7,rep,name=secondaryUris,proto3" json:"secondaryUris,omitempty"` // secondary CaRT URIs + Uri string `protobuf:"bytes,1,opt,name=uri,proto3" json:"uri,omitempty"` // Primary CaRT URI + Nctxs uint32 `protobuf:"varint,2,opt,name=nctxs,proto3" json:"nctxs,omitempty"` // Number of primary CaRT contexts + DrpcListenerSock string `protobuf:"bytes,3,opt,name=drpcListenerSock,proto3" json:"drpcListenerSock,omitempty"` // Path to I/O Engine's dRPC listener socket + InstanceIdx uint32 `protobuf:"varint,4,opt,name=instanceIdx,proto3" json:"instanceIdx,omitempty"` // I/O Engine instance index + Ntgts uint32 `protobuf:"varint,5,opt,name=ntgts,proto3" json:"ntgts,omitempty"` // number of VOS targets allocated in I/O Engine + Incarnation uint64 `protobuf:"varint,6,opt,name=incarnation,proto3" json:"incarnation,omitempty"` // HLC incarnation number + SecondaryUris []string `protobuf:"bytes,7,rep,name=secondaryUris,proto3" json:"secondaryUris,omitempty"` // secondary CaRT URIs + SecondaryNctxs []uint32 `protobuf:"varint,8,rep,packed,name=secondaryNctxs,proto3" json:"secondaryNctxs,omitempty"` // number of CaRT contexts for each secondary provider } func (x *NotifyReadyReq) Reset() { @@ -123,6 +124,13 @@ func (x *NotifyReadyReq) GetSecondaryUris() []string { return nil } +func (x *NotifyReadyReq) GetSecondaryNctxs() []uint32 { + if x != nil { + return x.SecondaryNctxs + } + return nil +} + type BioErrorReq struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -434,7 +442,7 @@ var File_srv_srv_proto protoreflect.FileDescriptor var file_srv_srv_proto_rawDesc = []byte{ 0x0a, 0x0d, 0x73, 0x72, 0x76, 0x2f, 0x73, 0x72, 0x76, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x03, 0x73, 0x72, 0x76, 0x22, 0xe4, 0x01, 0x0a, 0x0e, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, + 0x03, 0x73, 0x72, 0x76, 0x22, 0x8c, 0x02, 0x0a, 0x0e, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, 0x65, 0x61, 0x64, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6e, 0x63, 0x74, 0x78, 0x73, 0x12, @@ -448,40 +456,42 @@ var file_srv_srv_proto_rawDesc = []byte{ 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x65, - 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x22, 0xd5, 0x01, 0x0a, 0x0b, - 0x42, 0x69, 0x6f, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x1a, 0x0a, 0x08, 0x75, - 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x75, - 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, - 0x72, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, 0x72, - 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x77, 0x72, 0x69, 0x74, 0x65, 0x45, 0x72, 0x72, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x08, 0x77, 0x72, 0x69, 0x74, 0x65, 0x45, 0x72, 0x72, 0x12, 0x14, 0x0a, - 0x05, 0x74, 0x67, 0x74, 0x49, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x74, 0x67, - 0x74, 0x49, 0x64, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, - 0x64, 0x78, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, - 0x63, 0x65, 0x49, 0x64, 0x78, 0x12, 0x2a, 0x0a, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, - 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x55, 0x72, 0x69, 0x73, 0x12, 0x26, 0x0a, 0x0e, 0x73, + 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, 0x74, 0x78, 0x73, 0x18, 0x08, 0x20, + 0x03, 0x28, 0x0d, 0x52, 0x0e, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x4e, 0x63, + 0x74, 0x78, 0x73, 0x22, 0xd5, 0x01, 0x0a, 0x0b, 0x42, 0x69, 0x6f, 0x45, 0x72, 0x72, 0x6f, 0x72, + 0x52, 0x65, 0x71, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, 0x72, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x75, 0x6e, 0x6d, 0x61, 0x70, 0x45, 0x72, 0x72, 0x12, + 0x18, 0x0a, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, 0x72, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x07, 0x72, 0x65, 0x61, 0x64, 0x45, 0x72, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x77, 0x72, 0x69, + 0x74, 0x65, 0x45, 0x72, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x77, 0x72, 0x69, + 0x74, 0x65, 0x45, 0x72, 0x72, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x67, 0x74, 0x49, 0x64, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x74, 0x67, 0x74, 0x49, 0x64, 0x12, 0x20, 0x0a, 0x0b, 0x69, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0b, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x49, 0x64, 0x78, 0x12, 0x2a, 0x0a, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, - 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x75, 0x72, 0x69, 0x22, 0x23, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, - 0x63, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0x42, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x12, - 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, - 0x65, 0x71, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x22, 0x5b, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, - 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x73, - 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, - 0x63, 0x72, 0x65, 0x70, 0x73, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, - 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, - 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x72, 0x76, - 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x64, 0x72, 0x70, 0x63, 0x4c, 0x69, 0x73, + 0x74, 0x65, 0x6e, 0x65, 0x72, 0x53, 0x6f, 0x63, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x23, 0x0a, 0x0d, 0x47, + 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, + 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, + 0x22, 0x42, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x76, 0x63, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, + 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, + 0x72, 0x65, 0x70, 0x73, 0x22, 0x2a, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, + 0x42, 0x79, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x22, 0x5b, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x46, 0x69, 0x6e, 0x64, 0x42, 0x79, 0x4c, 0x61, + 0x62, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, + 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, + 0x75, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x72, 0x65, 0x70, 0x73, 0x42, 0x39, 0x5a, + 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, + 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, + 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x72, 0x76, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/control/network.go b/src/control/lib/control/network.go index 3d63f435cd4..140db0ada4c 100644 --- a/src/control/lib/control/network.go +++ b/src/control/lib/control/network.go @@ -207,9 +207,9 @@ type ( // PrimaryServiceRank provides a rank->uri mapping for a DAOS // Primary Service Rank (PSR). PrimaryServiceRank struct { - Rank uint32 - Uri string - Provider string + Rank uint32 + Uri string + ProviderIdx uint32 `json:"provider_idx"` } ClientNetworkHint struct { diff --git a/src/control/lib/control/system.go b/src/control/lib/control/system.go index b1ff5eff6c6..9c7fcd68b33 100644 --- a/src/control/lib/control/system.go +++ b/src/control/lib/control/system.go @@ -99,15 +99,16 @@ type SystemJoinReq struct { unaryRequest msRequest retryableRequest - ControlAddr *net.TCPAddr - UUID string - Rank system.Rank - URI string - SecondaryURIs []string `json:"secondary_uris"` - NumContexts uint32 `json:"Nctxs"` - FaultDomain *system.FaultDomain `json:"SrvFaultDomain"` - InstanceIdx uint32 `json:"Idx"` - Incarnation uint64 `json:"Incarnation"` + ControlAddr *net.TCPAddr + UUID string + Rank system.Rank + URI string + SecondaryURIs []string `json:"secondary_uris"` + NumContexts uint32 `json:"Nctxs"` + NumSecondaryContexts []uint32 `json:"secondary_nctxs"` + FaultDomain *system.FaultDomain `json:"SrvFaultDomain"` + InstanceIdx uint32 `json:"Idx"` + Incarnation uint64 `json:"Incarnation"` } // MarshalJSON packs SystemJoinResp struct into a JSON message. @@ -185,7 +186,8 @@ type SystemQueryReq struct { // SystemQueryResp contains the request response. type SystemQueryResp struct { sysResponse - Members system.Members `json:"members"` + Members system.Members `json:"members"` + Providers []string `json:"providers"` } // UnmarshalJSON unpacks JSON message into SystemQueryResp struct. diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 32ec0eb5de5..62be28bc12a 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -189,14 +189,15 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify } joinReq := &control.SystemJoinReq{ - UUID: superblock.UUID, - Rank: r, - URI: ready.GetUri(), - SecondaryURIs: ready.GetSecondaryUris(), - NumContexts: ready.GetNctxs(), - FaultDomain: ei.hostFaultDomain, - InstanceIdx: ei.Index(), - Incarnation: ready.GetIncarnation(), + UUID: superblock.UUID, + Rank: r, + URI: ready.GetUri(), + SecondaryURIs: ready.GetSecondaryUris(), + NumContexts: ready.GetNctxs(), + NumSecondaryContexts: ready.GetSecondaryNctxs(), + FaultDomain: ei.hostFaultDomain, + InstanceIdx: ei.Index(), + Incarnation: ready.GetIncarnation(), } resp, err := ei.joinSystem(ctx, joinReq) diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index e71f0c44836..7c8e1e95e71 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -71,15 +71,6 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo } } - getPrimaryProvider := func() string { - return svc.clientNetworkHint[0].Provider - } - - getSecondaryProvider := func(idx int) string { - // Primary is at idx 0, secondary providers start afterward - return svc.clientNetworkHint[idx+1].Provider - } - for rank, uris := range rankURIs { if len(svc.clientNetworkHint) < len(uris.Secondary)+1 { return nil, errors.Errorf("not enough client network hints (%d) for rank %d URIs (%d)", @@ -87,16 +78,15 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo } resp.RankUris = append(resp.RankUris, &mgmtpb.GetAttachInfoResp_RankUri{ - Rank: rank.Uint32(), - Uri: uris.Primary, - Provider: getPrimaryProvider(), + Rank: rank.Uint32(), + Uri: uris.Primary, }) for i, uri := range uris.Secondary { rankURI := &mgmtpb.GetAttachInfoResp_RankUri{ - Rank: rank.Uint32(), - Uri: uri, - Provider: getSecondaryProvider(i), + Rank: rank.Uint32(), + Uri: uri, + ProviderIdx: uint32(i + 1), } resp.SecondaryRankUris = append(resp.SecondaryRankUris, rankURI) @@ -695,6 +685,10 @@ func (svc *mgmtSvc) SystemQuery(ctx context.Context, req *mgmtpb.SystemQueryReq) return nil, err } + for _, hint := range svc.clientNetworkHint { + resp.Providers = append(resp.Providers, hint.Provider) + } + svc.log.Debugf("Responding to SystemQuery RPC: %s", mgmtpb.Debug(resp)) return resp, nil diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index a67cfa7a768..06101a58bed 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -107,14 +107,12 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.PrimaryFabricURI, - Provider: "ofi+verbs", + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, }, { - Rank: nonReplica.Rank.Uint32(), - Uri: nonReplica.PrimaryFabricURI, - Provider: "ofi+verbs", + Rank: nonReplica.Rank.Uint32(), + Uri: nonReplica.PrimaryFabricURI, }, }, MsRanks: []uint32{0}, @@ -140,14 +138,12 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.PrimaryFabricURI, - Provider: "ofi+tcp", + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, }, { - Rank: nonReplica.Rank.Uint32(), - Uri: nonReplica.PrimaryFabricURI, - Provider: "ofi+tcp", + Rank: nonReplica.Rank.Uint32(), + Uri: nonReplica.PrimaryFabricURI, }, }, MsRanks: []uint32{0}, @@ -173,9 +169,8 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.PrimaryFabricURI, - Provider: "ofi+tcp", + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, }, }, MsRanks: []uint32{0}, @@ -1044,11 +1039,13 @@ func TestServer_MgmtSvc_SystemQuery(t *testing.T) { emptyDb bool ranks string hosts string + clientNetHints []*mgmtpb.ClientNetHint expMembers []*mgmtpb.SystemMember expRanks string expAbsentHosts string expAbsentRanks string expErrMsg string + expProviders []string }{ "nil req": { nilReq: true, @@ -1161,6 +1158,60 @@ func TestServer_MgmtSvc_SystemQuery(t *testing.T) { emptyDb: true, expErrMsg: system.ErrRaftUnavail.Error(), }, + "use clientNetHint for providers": { + clientNetHints: []*mgmtpb.ClientNetHint{ + { + Provider: "prov1", + }, + { + Provider: "prov2", + }, + { + Provider: "prov3", + }, + }, + expProviders: []string{"prov1", "prov2", "prov3"}, + expMembers: []*mgmtpb.SystemMember{ + { + Rank: 0, Addr: test.MockHostAddr(1).String(), + Uuid: test.MockUUID(0), + State: stateString(system.MemberStateErrored), Info: "couldn't ping", + FaultDomain: "/", + }, + { + Rank: 1, Addr: test.MockHostAddr(1).String(), + Uuid: test.MockUUID(1), + // transition to "ready" illegal + State: stateString(system.MemberStateStopping), + FaultDomain: "/", + }, + { + Rank: 2, Addr: test.MockHostAddr(2).String(), + Uuid: test.MockUUID(2), + State: stateString(system.MemberStateUnresponsive), + FaultDomain: "/", + }, + { + Rank: 3, Addr: test.MockHostAddr(2).String(), + Uuid: test.MockUUID(3), + State: stateString(system.MemberStateJoined), + FaultDomain: "/", + }, + { + Rank: 4, Addr: test.MockHostAddr(3).String(), + Uuid: test.MockUUID(4), + State: stateString(system.MemberStateStarting), + FaultDomain: "/", + }, + { + Rank: 5, Addr: test.MockHostAddr(3).String(), + Uuid: test.MockUUID(5), + State: stateString(system.MemberStateStopped), + FaultDomain: "/", + }, + }, + expRanks: "0-5", + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -1178,6 +1229,7 @@ func TestServer_MgmtSvc_SystemQuery(t *testing.T) { svc := newTestMgmtSvc(t, log) svc.membership = svc.membership.WithTCPResolver(mockResolver) + svc.clientNetworkHint = tc.clientNetHints ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) defer cancel() @@ -1212,13 +1264,17 @@ func TestServer_MgmtSvc_SystemQuery(t *testing.T) { } cmpOpts := append(test.DefaultCmpOpts(), - protocmp.IgnoreFields(&mgmtpb.SystemMember{}, "last_update"), + protocmp.IgnoreFields(&mgmtpb.SystemMember{}, + "last_update", "fault_domain", "fabric_uri", "fabric_contexts", "incarnation"), ) if diff := cmp.Diff(tc.expMembers, gotResp.Members, cmpOpts...); diff != "" { - t.Logf("unexpected results (-want, +got)\n%s\n", diff) // prints on err + t.Errorf("unexpected results (-want, +got)\n%s\n", diff) } test.AssertEqual(t, tc.expAbsentHosts, gotResp.Absenthosts, "absent hosts") test.AssertEqual(t, tc.expAbsentRanks, gotResp.Absentranks, "absent ranks") + if diff := cmp.Diff(tc.expProviders, gotResp.Providers); diff != "" { + t.Errorf("unexpected results (-want, +got)\n%s\n", diff) + } }) } } diff --git a/src/engine/drpc_client.c b/src/engine/drpc_client.c index 85efbf8a98d..f3bf1f263f3 100644 --- a/src/engine/drpc_client.c +++ b/src/engine/drpc_client.c @@ -182,7 +182,7 @@ drpc_notify_ready(void) for (i = 0; i < nr_sec_uris; i++) { rc = crt_self_uri_get_secondary(i, &sec_uris[i]); if (rc != 0) { - D_ERROR("failed to get secondary provider URI, idx=%d, rc=%d", + D_ERROR("failed to get secondary provider URI, idx=%d, rc=%d\n", i, rc); nr_sec_uris = i; goto out_sec_uri; @@ -190,13 +190,22 @@ drpc_notify_ready(void) D_DEBUG(DB_MGMT, "secondary provider URI: %s\n", sec_uris[i]); } - D_DEBUG(DB_MGMT, "setting secondary provider URIs"); + D_DEBUG(DB_MGMT, "setting secondary provider URIs\n"); req.secondaryuris = sec_uris; req.n_secondaryuris = nr_sec_uris; + + D_DEBUG(DB_MGMT, "setting secondary provider number cart ctxs\n"); + req.n_secondarynctxs = nr_sec_uris; + D_ALLOC_ARRAY(req.secondarynctxs, nr_sec_uris); + if (req.secondarynctxs == NULL) + D_GOTO(out_sec_uri, rc = -DER_NOMEM); + for (i = 0; i < nr_sec_uris; i++) + req.secondarynctxs[i] = dss_sec_xs_nr; } req.incarnation = incarnation; req.nctxs = DSS_CTX_NR_TOTAL; + /* Do not free, this string is managed by the dRPC listener */ req.drpclistenersock = drpc_listener_socket_path; req.instanceidx = dss_instance_idx; @@ -205,7 +214,7 @@ drpc_notify_ready(void) reqb_size = srv__notify_ready_req__get_packed_size(&req); D_ALLOC(reqb, reqb_size); if (reqb == NULL) - D_GOTO(out_sec_uri, rc = -DER_NOMEM); + D_GOTO(out_sec_nctxs, rc = -DER_NOMEM); srv__notify_ready_req__pack(&req, reqb); rc = dss_drpc_call(DRPC_MODULE_SRV, DRPC_METHOD_SRV_NOTIFY_READY, reqb, @@ -221,6 +230,8 @@ drpc_notify_ready(void) drpc_response_free(dresp); out_reqb: D_FREE(reqb); +out_sec_nctxs: + D_FREE(req.secondarynctxs); out_sec_uri: for (i = 0; i < nr_sec_uris; i++) D_FREE(sec_uris[i]); diff --git a/src/engine/srv.pb-c.c b/src/engine/srv.pb-c.c index ffa2348c544..4a4b07c4be1 100644 --- a/src/engine/srv.pb-c.c +++ b/src/engine/srv.pb-c.c @@ -277,7 +277,7 @@ void srv__pool_find_by_label_resp__free_unpacked assert(message->base.descriptor == &srv__pool_find_by_label_resp__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[7] = +static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[8] = { { "uri", @@ -363,6 +363,18 @@ static const ProtobufCFieldDescriptor srv__notify_ready_req__field_descriptors[7 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "secondaryNctxs", + 8, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Srv__NotifyReadyReq, n_secondarynctxs), + offsetof(Srv__NotifyReadyReq, secondarynctxs), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned srv__notify_ready_req__field_indices_by_name[] = { 2, /* field[2] = drpcListenerSock */ @@ -370,13 +382,14 @@ static const unsigned srv__notify_ready_req__field_indices_by_name[] = { 3, /* field[3] = instanceIdx */ 1, /* field[1] = nctxs */ 4, /* field[4] = ntgts */ + 7, /* field[7] = secondaryNctxs */ 6, /* field[6] = secondaryUris */ 0, /* field[0] = uri */ }; static const ProtobufCIntRange srv__notify_ready_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 7 } + { 0, 8 } }; const ProtobufCMessageDescriptor srv__notify_ready_req__descriptor = { @@ -386,7 +399,7 @@ const ProtobufCMessageDescriptor srv__notify_ready_req__descriptor = "Srv__NotifyReadyReq", "srv", sizeof(Srv__NotifyReadyReq), - 7, + 8, srv__notify_ready_req__field_descriptors, srv__notify_ready_req__field_indices_by_name, 1, srv__notify_ready_req__number_ranges, diff --git a/src/engine/srv.pb-c.h b/src/engine/srv.pb-c.h index 902cd15fa49..8a70d1896ee 100644 --- a/src/engine/srv.pb-c.h +++ b/src/engine/srv.pb-c.h @@ -60,10 +60,15 @@ struct _Srv__NotifyReadyReq */ size_t n_secondaryuris; char **secondaryuris; + /* + * number of CaRT contexts for each secondary provider + */ + size_t n_secondarynctxs; + uint32_t *secondarynctxs; }; #define SRV__NOTIFY_READY_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&srv__notify_ready_req__descriptor) \ - , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL } + , (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL, 0,NULL } struct _Srv__BioErrorReq diff --git a/src/engine/tests/drpc_client_tests.c b/src/engine/tests/drpc_client_tests.c index b7c5d70706f..410bc03595b 100644 --- a/src/engine/tests/drpc_client_tests.c +++ b/src/engine/tests/drpc_client_tests.c @@ -29,6 +29,7 @@ const char *dss_socket_dir = "/my/fake/path"; char *drpc_listener_socket_path = "/fake/listener.sock"; char dss_hostname[DSS_HOSTNAME_MAX_LEN] = "foo-host"; +unsigned int dss_sec_xs_nr = 1; /* DAOS internal globals - arbitrary values okay */ uint32_t dss_tgt_offload_xs_nr = 3; diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index f549232f618..b0d3a509e8f 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -827,7 +827,7 @@ const ProtobufCMessageDescriptor mgmt__group_update_resp__descriptor = (ProtobufCMessageInit) mgmt__group_update_resp__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[10] = +static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[11] = { { "sys", @@ -949,6 +949,18 @@ static const ProtobufCFieldDescriptor mgmt__join_req__field_descriptors[10] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "secondary_nctxs", + 11, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Mgmt__JoinReq, n_secondary_nctxs), + offsetof(Mgmt__JoinReq, secondary_nctxs), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__join_req__field_indices_by_name[] = { 5, /* field[5] = addr */ @@ -956,6 +968,7 @@ static const unsigned mgmt__join_req__field_indices_by_name[] = { 8, /* field[8] = incarnation */ 4, /* field[4] = nctxs */ 2, /* field[2] = rank */ + 10, /* field[10] = secondary_nctxs */ 9, /* field[9] = secondary_uris */ 6, /* field[6] = srvFaultDomain */ 0, /* field[0] = sys */ @@ -965,7 +978,7 @@ static const unsigned mgmt__join_req__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__join_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 10 } + { 0, 11 } }; const ProtobufCMessageDescriptor mgmt__join_req__descriptor = { @@ -975,7 +988,7 @@ const ProtobufCMessageDescriptor mgmt__join_req__descriptor = "Mgmt__JoinReq", "mgmt", sizeof(Mgmt__JoinReq), - 10, + 11, mgmt__join_req__field_descriptors, mgmt__join_req__field_indices_by_name, 1, mgmt__join_req__number_ranges, @@ -1422,20 +1435,20 @@ static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__fiel 0,NULL,NULL /* reserved1,reserved2, etc */ }, { - "provider", + "provider_idx", 3, PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_STRING, + PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp__RankUri, provider), + offsetof(Mgmt__GetAttachInfoResp__RankUri, provider_idx), + NULL, NULL, - &protobuf_c_empty_string, 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, }; static const unsigned mgmt__get_attach_info_resp__rank_uri__field_indices_by_name[] = { - 2, /* field[2] = provider */ + 2, /* field[2] = provider_idx */ 0, /* field[0] = rank */ 1, /* field[1] = uri */ }; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 300e45c77b5..8f1cbbe7b3f 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -145,10 +145,15 @@ struct _Mgmt__JoinReq */ size_t n_secondary_uris; char **secondary_uris; + /* + * CaRT context count for each secondary provider + */ + size_t n_secondary_nctxs; + uint32_t *secondary_nctxs; }; #define MGMT__JOIN_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_req__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0,NULL } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0,NULL, 0,NULL } struct _Mgmt__JoinResp @@ -279,11 +284,11 @@ struct _Mgmt__GetAttachInfoResp__RankUri ProtobufCMessage base; uint32_t rank; char *uri; - char *provider; + uint32_t provider_idx; }; #define MGMT__GET_ATTACH_INFO_RESP__RANK_URI__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_resp__rank_uri__descriptor) \ - , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } + , 0, (char *)protobuf_c_empty_string, 0 } struct _Mgmt__GetAttachInfoResp diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 2abf7084029..9b50ff95f1f 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -41,6 +41,7 @@ message JoinReq { uint32 idx = 8; // Instance index on server node. uint64 incarnation = 9; // rank incarnation repeated string secondary_uris = 10; // URIs for any secondary providers + repeated uint32 secondary_nctxs = 11; // CaRT context count for each secondary provider } message JoinResp { @@ -88,7 +89,7 @@ message GetAttachInfoResp { message RankUri { uint32 rank = 1; string uri = 2; - string provider = 3; + uint32 provider_idx = 3; } repeated RankUri rank_uris = 2; // Rank URIs for the primary provider // These CaRT settings are shared with the diff --git a/src/proto/mgmt/system.proto b/src/proto/mgmt/system.proto index 7cb32425ce5..1ff71c06bd1 100644 --- a/src/proto/mgmt/system.proto +++ b/src/proto/mgmt/system.proto @@ -76,6 +76,7 @@ message SystemQueryResp { repeated SystemMember members = 1; string absentranks = 2; // rankset missing from membership string absenthosts = 3; // hostset missing from membership + repeated string providers = 4; // Providers supported by system in configured order } // SystemEraseReq supplies system erase parameters. diff --git a/src/proto/srv/srv.proto b/src/proto/srv/srv.proto index 28393ac4cad..253aa0c7dd9 100644 --- a/src/proto/srv/srv.proto +++ b/src/proto/srv/srv.proto @@ -19,6 +19,7 @@ message NotifyReadyReq { uint32 ntgts = 5; // number of VOS targets allocated in I/O Engine uint64 incarnation = 6; // HLC incarnation number repeated string secondaryUris = 7; // secondary CaRT URIs + repeated uint32 secondaryNctxs = 8; // number of CaRT contexts for each secondary provider } // NotifyReadyResp is nil. diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml index ca29e582a3f..96a47570f83 100644 --- a/utils/config/daos_agent.yml +++ b/utils/config/daos_agent.yml @@ -74,10 +74,10 @@ ## default: false #disable_caching: true -# Manually define the fabric interfaces and domains to be used by the agent, -# organized by NUMA node. -# If not defined, the agent will automatically detect all fabric interfaces and -# select appropriate ones based on the server preferences. +## Manually define the fabric interfaces and domains to be used by the agent, +## organized by NUMA node. +## If not defined, the agent will automatically detect all fabric interfaces and +## select appropriate ones based on the server preferences. # #fabric_ifaces: #- @@ -99,7 +99,14 @@ # iface: ib3 # domain: mlx5_3 -# Manually force a specific fabric provider to be used by all clients, in the event that the server -# supports multiple providers. +## Select the fabric provider to be used by all clients, if the server is configured with +## multiple providers. +## The provider index is 0-based and based on the order of fabric providers listed in the +## "provider" field of the corresponding daos_server configuration. An administrator can +## see the list of configured server providers and their indices by using the "dmg system +## query" command. +## Index 0 corresponds to the primary provider. Any index > 0 is a secondary provider. # -#provider: ofi+verbs +## default: 0 +# +#provider_idx: 1 From 51dbe4912b79414d96d823fb3787539017974a28 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Fri, 15 Jul 2022 13:57:29 -0700 Subject: [PATCH 19/28] CART-89 multiprov: Auto-tag replacement for secondary providers (#9519) * CART-89 multiprov: Auto-tag replacement for secondary providers - When the client is secondary, we do not want to use target endpoint information for deciding which secondary tag to send rpc to. Instead we send it to secondary tag=0. In future API will be added to control total number of server tags, and instead of 0 a random tag will be chosen. intended destination tag is present in the rpc header and available to be retrieved via apis. - test updated to try new apis and test behavior Signed-off-by: Alexander A Oganezov --- src/cart/crt_rpc.c | 32 ++++++++++++++++----- src/tests/ftest/cart/dual_provider_client.c | 2 +- src/tests/ftest/cart/dual_provider_common.h | 19 ++++++++++-- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/cart/crt_rpc.c b/src/cart/crt_rpc.c index 1287db77803..14dd67e9eb9 100644 --- a/src/cart/crt_rpc.c +++ b/src/cart/crt_rpc.c @@ -251,7 +251,8 @@ crt_internal_rpc_register(bool server) return rc; } - /* TODO: The self-test protocols should not be registered on the client + /* + * TODO: The self-test protocols should not be registered on the client * by default. */ @@ -480,8 +481,9 @@ crt_rpc_priv_set_ep(struct crt_rpc_priv *rpc_priv, crt_endpoint_t *tgt_ep) } else { rpc_priv->crp_pub.cr_ep.ep_grp = tgt_ep->ep_grp; } - rpc_priv->crp_pub.cr_ep.ep_rank = tgt_ep->ep_rank; + rpc_priv->crp_pub.cr_ep.ep_tag = tgt_ep->ep_tag; + rpc_priv->crp_pub.cr_ep.ep_rank = tgt_ep->ep_rank; rpc_priv->crp_have_ep = 1; } @@ -524,6 +526,7 @@ crt_req_create_internal(crt_context_t crt_ctx, crt_endpoint_t *tgt_ep, if (tgt_ep != NULL) { rc = check_ep(tgt_ep, &grp_priv); + if (rc != 0) D_GOTO(out, rc); @@ -1104,6 +1107,7 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) crt_phy_addr_t uri = NULL; int rc = 0; crt_phy_addr_t base_addr = NULL; + int dst_tag; req = &rpc_priv->crp_pub; ctx = req->cr_ctx; @@ -1112,22 +1116,32 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) *uri_exists = false; grp_priv = crt_grp_pub2priv(tgt_ep->ep_grp); + dst_tag = tgt_ep->ep_tag; + + if (!crt_gdata.cg_provider_is_primary) { + /* + * TODO: Add API to set number of destination tags + * for the secondary provider + */ + dst_tag = 0; + } + crt_grp_lc_lookup(grp_priv, ctx->cc_idx, - tgt_ep->ep_rank, tgt_ep->ep_tag, &base_addr, + tgt_ep->ep_rank, dst_tag, &base_addr, &rpc_priv->crp_hg_addr); if (base_addr == NULL && rpc_priv->crp_hg_addr == NULL) { if (crt_req_is_self(rpc_priv)) { - rc = crt_self_uri_get(tgt_ep->ep_tag, &uri); + rc = crt_self_uri_get(dst_tag, &uri); if (rc != DER_SUCCESS) { D_ERROR("crt_self_uri_get(tag: %d) failed, " - "rc %d\n", tgt_ep->ep_tag, rc); + "rc %d\n", dst_tag, rc); D_GOTO(out, rc); } rc = crt_grp_lc_uri_insert(grp_priv, tgt_ep->ep_rank, - tgt_ep->ep_tag, base_addr); + dst_tag, base_addr); if (rc != 0) D_GOTO(out, rc); @@ -1155,7 +1169,7 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) if (base_addr == NULL && !crt_is_service()) { D_RWLOCK_RDLOCK(&grp_priv->gp_rwlock); if (tgt_ep->ep_rank == grp_priv->gp_psr_rank && - tgt_ep->ep_tag == 0) { + dst_tag == 0) { D_STRNDUP(uri, grp_priv->gp_psr_phy_addr, CRT_ADDR_STR_MAX_LEN); D_RWLOCK_UNLOCK(&grp_priv->gp_rwlock); @@ -1702,6 +1716,10 @@ crt_rpc_common_hdlr(struct crt_rpc_priv *rpc_priv) skip_check = true; } + /* Skip check for secondary provider clients */ + if (!rpc_priv->crp_req_hdr.cch_src_is_primary) + skip_check = true; + if ((self_rank != rpc_priv->crp_req_hdr.cch_dst_rank) || (crt_ctx->cc_idx != rpc_priv->crp_req_hdr.cch_dst_tag)) { if (!skip_check) { diff --git a/src/tests/ftest/cart/dual_provider_client.c b/src/tests/ftest/cart/dual_provider_client.c index 86749844cc3..9b8a20b3338 100644 --- a/src/tests/ftest/cart/dual_provider_client.c +++ b/src/tests/ftest/cart/dual_provider_client.c @@ -73,7 +73,7 @@ int main(int argc, char **argv) int num_remote_tags; bool use_primary = true; - while ((c = getopt(argc, argv, "i:p:d:s")) != -1) { + while ((c = getopt(argc, argv, "i:p:d:c:s")) != -1) { switch (c) { case 'i': arg_interface = optarg; diff --git a/src/tests/ftest/cart/dual_provider_common.h b/src/tests/ftest/cart/dual_provider_common.h index b5b2c5004b9..6d0d80680f0 100644 --- a/src/tests/ftest/cart/dual_provider_common.h +++ b/src/tests/ftest/cart/dual_provider_common.h @@ -118,7 +118,8 @@ handler_ping(crt_rpc_t *rpc) crt_context_t *ctx; int rc = 0; bool primary_origin = false; - + int my_tag; + uint32_t hdr_dst_tag; input = crt_req_get(rpc); output = crt_reply_get(rpc); @@ -128,7 +129,6 @@ handler_ping(crt_rpc_t *rpc) ctx = rpc->cr_ctx; - rc = crt_req_src_provider_is_primary(rpc, &primary_origin); if (rc != 0) { @@ -136,8 +136,21 @@ handler_ping(crt_rpc_t *rpc) error_exit(); } - DBG_PRINT("RPC arived on a %s context; origin was %s\n", + rc = crt_req_dst_tag_get(rpc, &hdr_dst_tag); + if (rc != 0) { + D_ERROR("crt_req_dst_tag_get() failed; rc=%d\n", rc); + error_exit(); + } + + rc = crt_context_idx(rpc->cr_ctx, &my_tag); + if (rc != 0) { + D_ERROR("crt_context_idx() failed; rc=%d\n", rc); + error_exit(); + } + + DBG_PRINT("RPC arrived on a %s context (idx=%d intended_tag=%d); origin was %s\n", crt_context_is_primary(ctx) ? "primary" : "secondary", + my_tag, hdr_dst_tag, primary_origin ? "primary" : "secondary"); /* TODO: Change this to rank == 2 when bulk support is added */ From 20e9fa78053a41cb2066fe0854612f6557e2c539 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 13 Oct 2022 16:26:04 -0700 Subject: [PATCH 20/28] DAOS-11884 cart: Add new API to set a number of remote endpoints (#10532) - Add new API to set number of remote endpoints - CART will round-robin around all remote endpoints when sending RPCs over a secondary provider. Defaults to 1 endpoint (tag=0). Signed-off-by: Alexander A Oganezov --- src/cart/crt_group.c | 29 +++++++++++++++++++++ src/cart/crt_init.c | 27 +++++++++++++++---- src/cart/crt_internal_types.h | 7 +++++ src/cart/crt_rpc.c | 15 +++++++---- src/include/cart/api.h | 18 +++++++++++++ src/tests/ftest/cart/dual_provider_server.c | 4 +-- 6 files changed, 88 insertions(+), 12 deletions(-) diff --git a/src/cart/crt_group.c b/src/cart/crt_group.c index cab5d924c81..2ded80ba87b 100644 --- a/src/cart/crt_group.c +++ b/src/cart/crt_group.c @@ -1849,6 +1849,35 @@ crt_group_config_path_set(const char *path) return 0; } +int +crt_nr_secondary_remote_tags_set(int idx, int num_tags) +{ + struct crt_prov_gdata *prov_data; + + D_DEBUG(DB_ALL, "secondary_idx=%d num_tags=%d\n", idx, num_tags); + + if (idx != 0) { + D_ERROR("Only idx=0 is currently supported\n"); + return -DER_NONEXIST; + } + + if ((crt_gdata.cg_prov_gdata_secondary == NULL) || + (idx >= crt_gdata.cg_num_secondary_provs)) { + D_ERROR("Secondary providers not initialized\n"); + return -DER_NONEXIST; + } + + if (num_tags <= 0) { + D_ERROR("Invalid number of tags: %d\n", num_tags); + return -DER_INVAL; + } + + prov_data = &crt_gdata.cg_prov_gdata_secondary[idx]; + prov_data->cpg_num_remote_tags = num_tags; + + return DER_SUCCESS; +} + /** * Save attach info to file with the name * "/grpid.attach_info_tmp". diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index bb1c9464c94..45f8bb262f2 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -101,7 +101,7 @@ mem_pin_workaround(void) return; } -static void +static int prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool primary, crt_init_options_t *opt) @@ -112,6 +112,11 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, uint32_t max_expect_size = 0; uint32_t max_unexpect_size = 0; uint32_t max_num_ctx = 256; + int rc; + + rc = D_MUTEX_INIT(&prov_data->cpg_mutex, NULL); + if (rc != 0) + return rc; /* Assume for now this option is only available for a primary provider */ if (primary) { @@ -148,10 +153,16 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, prov_data->cpg_max_unexp_size = max_unexpect_size; prov_data->cpg_primary = primary; + /* By default set number of secondary remote tags to 1 */ + prov_data->cpg_num_remote_tags = 1; + prov_data->cpg_last_remote_tag = 0; + D_DEBUG(DB_ALL, "prov_idx: %d primary: %d sep_mode: %d sizes: (%d/%d)\n", provider, primary, set_sep, max_expect_size, max_unexpect_size); D_INIT_LIST_HEAD(&prov_data->cpg_ctx_list); + + return DER_SUCCESS; } /* first step init - for initializing crt_gdata */ @@ -650,8 +661,11 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_GOTO(cleanup, rc = -DER_INVAL); } - prov_data_init(&crt_gdata.cg_prov_gdata_primary, - primary_provider, true, opt); + rc = prov_data_init(&crt_gdata.cg_prov_gdata_primary, + primary_provider, true, opt); + if (rc != 0) + D_GOTO(cleanup, rc); + prov_settings_apply(true, primary_provider, opt); crt_gdata.cg_primary_prov = primary_provider; @@ -683,8 +697,11 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) for (i = 0; i < num_secondaries; i++) { tmp_prov = crt_gdata.cg_secondary_provs[i]; - prov_data_init(&crt_gdata.cg_prov_gdata_secondary[i], - tmp_prov, false, opt); + rc = prov_data_init(&crt_gdata.cg_prov_gdata_secondary[i], + tmp_prov, false, opt); + if (rc != 0) + D_GOTO(cleanup, rc); + prov_settings_apply(false, tmp_prov, opt); rc = crt_na_config_init(false, tmp_prov, iface1, domain1, port1); diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 19f5897b1e9..0adbf26ddb3 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -53,11 +53,18 @@ struct crt_prov_gdata { uint32_t cpg_max_exp_size; uint32_t cpg_max_unexp_size; + /** Number of remote tags */ + uint32_t cpg_num_remote_tags; + uint32_t cpg_last_remote_tag; + /** Set of flags */ unsigned int cpg_sep_mode : 1, cpg_primary : 1, cpg_contig_ports : 1, cpg_inited : 1; + + /** Mutext to protect fields above */ + pthread_mutex_t cpg_mutex; }; #define MAX_NUM_SECONDARY_PROVS 2 diff --git a/src/cart/crt_rpc.c b/src/cart/crt_rpc.c index 14dd67e9eb9..c96b49c547d 100644 --- a/src/cart/crt_rpc.c +++ b/src/cart/crt_rpc.c @@ -1107,6 +1107,7 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) crt_phy_addr_t uri = NULL; int rc = 0; crt_phy_addr_t base_addr = NULL; + struct crt_prov_gdata *prov_data; int dst_tag; req = &rpc_priv->crp_pub; @@ -1118,12 +1119,16 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) dst_tag = tgt_ep->ep_tag; + /* For a secondary provider round-robin between all available remote contexts */ if (!crt_gdata.cg_provider_is_primary) { - /* - * TODO: Add API to set number of destination tags - * for the secondary provider - */ - dst_tag = 0; + + prov_data = &crt_gdata.cg_prov_gdata_secondary[0]; + + D_MUTEX_LOCK(&prov_data->cpg_mutex); + prov_data->cpg_last_remote_tag++; + prov_data->cpg_last_remote_tag %= prov_data->cpg_num_remote_tags; + dst_tag = prov_data->cpg_last_remote_tag; + D_MUTEX_UNLOCK(&prov_data->cpg_mutex); } crt_grp_lc_lookup(grp_priv, ctx->cc_idx, diff --git a/src/include/cart/api.h b/src/include/cart/api.h index de3437c4e47..85e44033773 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -2154,6 +2154,24 @@ int crt_group_info_get(crt_group_t *group, d_iov_t *grp_info); */ int crt_group_info_set(d_iov_t *grp_info); +/** + * Sets the number of the remote tags for the secondary provider. + * + * Each tag corresponds to a remote context or an endpoint. + * By default, CaRT assumes 1 remote tag for each secondary provider. + * + * When a number of tags is set to more than 1 for any secondary provider then + * CaRT will round-robin across all secondary provider tags instead of defaulting + * to endpoint0 for secondary provider communications. + * + * \param[in] idx Secondary provider index. Currently only 0 is supported. + * \param[in] num_tags Number of remote tags to set. + * + * \return DER_SUCCESS on success, negative value + * on failure. + */ +int crt_nr_secondary_remote_tags_set(int idx, int num_tags); + /** * Retrieve list of ranks that belong to the specified group. * diff --git a/src/tests/ftest/cart/dual_provider_server.c b/src/tests/ftest/cart/dual_provider_server.c index aff16da7f08..b53c1897b8a 100644 --- a/src/tests/ftest/cart/dual_provider_server.c +++ b/src/tests/ftest/cart/dual_provider_server.c @@ -190,13 +190,13 @@ int main(int argc, char **argv) } if (num_primary_ctx > NUM_PRIMARY_CTX_MAX) { - printf("Error: Exceeded max alllowed %d for primary ctx\n", + printf("Error: Exceeded max allowed %d for primary ctx\n", NUM_PRIMARY_CTX_MAX); return -1; } if (num_secondary_ctx > NUM_SECONDARY_CTX_MAX) { - printf("Error: Exceeded max alllowed %d for secondary ctx\n", + printf("Error: Exceeded max allowed %d for secondary ctx\n", NUM_SECONDARY_CTX_MAX); return -1; } From 780d65b2160a37ecbc1a6766c73220bd6f45054f Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 14 Oct 2022 14:18:15 -0600 Subject: [PATCH 21/28] DAOS-11893 control: Include num ctxs in GetAttachInfo (#10544) * DAOS-11893 control: Include num ctxs in GetAttachInfo - Save number of secondary provider contexts in Management Service. - Include the number of contexts for each provider with the rank URIs in GetAttachInfo. - Update Raft unit test fixtures for the MS updates. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/mgmt_rpc.go | 2 +- src/control/common/proto/mgmt/svc.pb.go | 53 ++++++----- src/control/lib/control/network.go | 1 + src/control/server/mgmt_system.go | 24 +++-- src/control/server/mgmt_system_test.go | 24 +++-- src/control/system/member.go | 23 ++--- src/control/system/membership.go | 39 ++++---- src/control/system/membership_test.go | 6 +- src/control/system/mocks.go | 2 +- src/control/system/raft/database.go | 16 ++-- src/control/system/raft/database_test.go | 88 +++++++++++++++--- .../testdata/raft_recovery/daos_system.db | Bin 65536 -> 65536 bytes .../snapshots/2-11-1664895530778/state.bin | 1 - .../meta.json | 2 +- .../snapshots/2-11-1665528548388/state.bin | 1 + .../snapshots/2-19-1664895532148/state.bin | 1 - .../meta.json | 2 +- .../snapshots/2-19-1665528549936/state.bin | 1 + src/mgmt/svc.pb-c.c | 19 +++- src/mgmt/svc.pb-c.h | 3 +- src/proto/mgmt/svc.proto | 1 + 21 files changed, 208 insertions(+), 101 deletions(-) delete mode 100644 src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/state.bin rename src/control/system/raft/testdata/raft_recovery/snapshots/{2-11-1664895530778 => 2-11-1665528548388}/meta.json (51%) create mode 100644 src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin delete mode 100644 src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/state.bin rename src/control/system/raft/testdata/raft_recovery/snapshots/{2-19-1664895532148 => 2-19-1665528549936}/meta.json (51%) create mode 100644 src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index c3202f760f0..d8bd2da722e 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -213,7 +213,6 @@ func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, numaNode int, sys func (mod *mgmtModule) selectAttachInfo(ctx context.Context, srvResp *mgmtpb.GetAttachInfoResp, iface, domain string) (*mgmtpb.GetAttachInfoResp, error) { reqProviders := mod.getIfaceProviders(ctx, iface, domain) - mod.log.Debugf("requested interface %q (domain: %q) supports providers: %s", iface, domain, strings.Join(reqProviders.ToSlice(), ", ")) if mod.providerIdx > 0 { // Secondary provider indices begin at 1 @@ -267,6 +266,7 @@ func (mod *mgmtModule) getIfaceProviders(ctx context.Context, iface, domain stri providers.Add(fis.Providers()...) } + mod.log.Debugf("requested interface %q (domain: %q) supports providers: %s", iface, domain, strings.Join(providers.ToSlice(), ", ")) return providers } diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 3dd0d5a57a2..e6a5303525c 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -1077,6 +1077,7 @@ type GetAttachInfoResp_RankUri struct { Rank uint32 `protobuf:"varint,1,opt,name=rank,proto3" json:"rank,omitempty"` Uri string `protobuf:"bytes,2,opt,name=uri,proto3" json:"uri,omitempty"` ProviderIdx uint32 `protobuf:"varint,3,opt,name=provider_idx,json=providerIdx,proto3" json:"provider_idx,omitempty"` + NumCtxs uint32 `protobuf:"varint,4,opt,name=num_ctxs,json=numCtxs,proto3" json:"num_ctxs,omitempty"` } func (x *GetAttachInfoResp_RankUri) Reset() { @@ -1132,6 +1133,13 @@ func (x *GetAttachInfoResp_RankUri) GetProviderIdx() uint32 { return 0 } +func (x *GetAttachInfoResp_RankUri) GetNumCtxs() uint32 { + if x != nil { + return x.NumCtxs + } + return 0 +} + var File_mgmt_svc_proto protoreflect.FileDescriptor var file_mgmt_svc_proto_rawDesc = []byte{ @@ -1216,7 +1224,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x70, - 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, 0xb8, 0x03, 0x0a, 0x11, 0x47, + 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, 0xd3, 0x03, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, @@ -1239,31 +1247,32 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x17, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, - 0x73, 0x1a, 0x52, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, + 0x73, 0x1a, 0x6d, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, - 0x65, 0x72, 0x49, 0x64, 0x78, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, - 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, - 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, - 0x20, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, - 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, - 0x6b, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, - 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, - 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, - 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, - 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, - 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, - 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, - 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, - 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, - 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, - 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x65, 0x72, 0x49, 0x64, 0x78, 0x12, 0x19, 0x0a, 0x08, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x74, 0x78, + 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x6e, 0x75, 0x6d, 0x43, 0x74, 0x78, 0x73, + 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, 0x77, 0x6e, + 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, 0x67, 0x52, + 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x20, 0x0a, 0x0a, 0x53, 0x65, + 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x7c, 0x0a, 0x0e, + 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, + 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, + 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, + 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, + 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/control/network.go b/src/control/lib/control/network.go index 140db0ada4c..cd78c8672a5 100644 --- a/src/control/lib/control/network.go +++ b/src/control/lib/control/network.go @@ -210,6 +210,7 @@ type ( Rank uint32 Uri string ProviderIdx uint32 `json:"provider_idx"` + NumCtxs uint32 `json:"num_ctxs"` } ClientNetworkHint struct { diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 1ce0e36585d..ba25e7a7b6d 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -79,8 +79,9 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo } resp.RankUris = append(resp.RankUris, &mgmtpb.GetAttachInfoResp_RankUri{ - Rank: rank.Uint32(), - Uri: entry.PrimaryURI, + Rank: rank.Uint32(), + Uri: entry.PrimaryURI, + NumCtxs: entry.NumPrimaryCtxs, }) for i, uri := range entry.SecondaryURIs { @@ -88,6 +89,7 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo Rank: rank.Uint32(), Uri: uri, ProviderIdx: uint32(i + 1), + NumCtxs: entry.NumSecondaryCtxs[i], } resp.SecondaryRankUris = append(resp.SecondaryRankUris, rankURI) @@ -98,6 +100,7 @@ func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfo if len(svc.clientNetworkHint) > 1 { resp.SecondaryClientNetHints = svc.clientNetworkHint[1:] } + resp.MsRanks = ranklist.RanksToUint32(groupMap.MSRanks) // For resp.RankUris may be large, we make a resp copy with a limited @@ -290,14 +293,15 @@ func (svc *mgmtSvc) join(ctx context.Context, req *batchJoinRequest) *batchJoinR } joinResponse, err := svc.membership.Join(&system.JoinRequest{ - Rank: ranklist.Rank(req.Rank), - UUID: uuid, - ControlAddr: req.peerAddr, - PrimaryFabricURI: req.GetUri(), - SecondaryFabricURIs: req.GetSecondaryUris(), - FabricContexts: req.GetNctxs(), - FaultDomain: fd, - Incarnation: req.GetIncarnation(), + Rank: ranklist.Rank(req.Rank), + UUID: uuid, + ControlAddr: req.peerAddr, + PrimaryFabricURI: req.GetUri(), + SecondaryFabricURIs: req.GetSecondaryUris(), + FabricContexts: req.GetNctxs(), + SecondaryFabricContexts: req.GetSecondaryNctxs(), + FaultDomain: fd, + Incarnation: req.GetIncarnation(), }) if err != nil { return &batchJoinResponse{joinErr: err} diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 27be8a55fee..5e4eb8bad7c 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -112,12 +112,14 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.PrimaryFabricURI, + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, + NumCtxs: 0, }, { - Rank: nonReplica.Rank.Uint32(), - Uri: nonReplica.PrimaryFabricURI, + Rank: nonReplica.Rank.Uint32(), + Uri: nonReplica.PrimaryFabricURI, + NumCtxs: 1, }, }, MsRanks: []uint32{0}, @@ -143,12 +145,14 @@ func TestServer_MgmtSvc_GetAttachInfo(t *testing.T) { }, RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ { - Rank: msReplica.Rank.Uint32(), - Uri: msReplica.PrimaryFabricURI, + Rank: msReplica.Rank.Uint32(), + Uri: msReplica.PrimaryFabricURI, + NumCtxs: 0, }, { - Rank: nonReplica.Rank.Uint32(), - Uri: nonReplica.PrimaryFabricURI, + Rank: nonReplica.Rank.Uint32(), + Uri: nonReplica.PrimaryFabricURI, + NumCtxs: 1, }, }, MsRanks: []uint32{0}, @@ -464,7 +468,7 @@ func mockMember(t *testing.T, r, a int32, s string) *system.Member { uri := fmt.Sprintf("tcp://%s", addr) m := system.MockMemberFullSpec(t, ranklist.Rank(r), test.MockUUID(r), uri, addr, state) - m.FabricContexts = uint32(r) + m.PrimaryFabricContexts = uint32(r) m.FaultDomain = fd m.Incarnation = uint64(r) @@ -2077,7 +2081,7 @@ func TestServer_MgmtSvc_Join(t *testing.T) { tc.req.SrvFaultDomain = newMember.FaultDomain.String() } if tc.req.Nctxs == 0 { - tc.req.Nctxs = newMember.FabricContexts + tc.req.Nctxs = newMember.PrimaryFabricContexts } if tc.req.Incarnation == 0 { tc.req.Incarnation = newMember.Incarnation diff --git a/src/control/system/member.go b/src/control/system/member.go index 47f822da26d..97714ebebd4 100644 --- a/src/control/system/member.go +++ b/src/control/system/member.go @@ -161,17 +161,18 @@ func (ms MemberState) isTransitionIllegal(to MemberState) bool { // Member refers to a data-plane instance that is a member of this DAOS // system running on host with the control-plane listening at "Addr". type Member struct { - Rank ranklist.Rank `json:"rank"` - Incarnation uint64 `json:"incarnation"` - UUID uuid.UUID `json:"uuid"` - Addr *net.TCPAddr `json:"addr"` - PrimaryFabricURI string `json:"fabric_uri"` - SecondaryFabricURIs []string `json:"secondary_fabric_uris"` - FabricContexts uint32 `json:"fabric_contexts"` - State MemberState `json:"-"` - Info string `json:"info"` - FaultDomain *FaultDomain `json:"fault_domain"` - LastUpdate time.Time `json:"last_update"` + Rank ranklist.Rank `json:"rank"` + Incarnation uint64 `json:"incarnation"` + UUID uuid.UUID `json:"uuid"` + Addr *net.TCPAddr `json:"addr"` + PrimaryFabricURI string `json:"fabric_uri"` + SecondaryFabricURIs []string `json:"secondary_fabric_uris"` + PrimaryFabricContexts uint32 `json:"fabric_contexts"` + SecondaryFabricContexts []uint32 `json:"secondary_fabric_contexts"` + State MemberState `json:"-"` + Info string `json:"info"` + FaultDomain *FaultDomain `json:"fault_domain"` + LastUpdate time.Time `json:"last_update"` } // MarshalJSON marshals system.Member to JSON. diff --git a/src/control/system/membership.go b/src/control/system/membership.go index b5cffac6459..664eac20b07 100644 --- a/src/control/system/membership.go +++ b/src/control/system/membership.go @@ -95,14 +95,15 @@ func (m *Membership) Count() (int, error) { // JoinRequest contains information needed for join membership update. type JoinRequest struct { - Rank Rank - UUID uuid.UUID - ControlAddr *net.TCPAddr - PrimaryFabricURI string - SecondaryFabricURIs []string - FabricContexts uint32 - FaultDomain *FaultDomain - Incarnation uint64 + Rank Rank + UUID uuid.UUID + ControlAddr *net.TCPAddr + PrimaryFabricURI string + SecondaryFabricURIs []string + FabricContexts uint32 + SecondaryFabricContexts []uint32 + FaultDomain *FaultDomain + Incarnation uint64 } // JoinResponse contains information returned from join membership update. @@ -169,7 +170,8 @@ func (m *Membership) Join(req *JoinRequest) (resp *JoinResponse, err error) { curMember.Addr = req.ControlAddr curMember.PrimaryFabricURI = req.PrimaryFabricURI curMember.SecondaryFabricURIs = req.SecondaryFabricURIs - curMember.FabricContexts = req.FabricContexts + curMember.PrimaryFabricContexts = req.FabricContexts + curMember.SecondaryFabricContexts = req.SecondaryFabricContexts curMember.FaultDomain = req.FaultDomain curMember.Incarnation = req.Incarnation if err := m.db.UpdateMember(curMember); err != nil { @@ -194,15 +196,16 @@ func (m *Membership) Join(req *JoinRequest) (resp *JoinResponse, err error) { } newMember := &Member{ - Rank: req.Rank, - Incarnation: req.Incarnation, - UUID: req.UUID, - Addr: req.ControlAddr, - PrimaryFabricURI: req.PrimaryFabricURI, - SecondaryFabricURIs: req.SecondaryFabricURIs, - FabricContexts: req.FabricContexts, - FaultDomain: req.FaultDomain, - State: MemberStateJoined, + Rank: req.Rank, + Incarnation: req.Incarnation, + UUID: req.UUID, + Addr: req.ControlAddr, + PrimaryFabricURI: req.PrimaryFabricURI, + SecondaryFabricURIs: req.SecondaryFabricURIs, + PrimaryFabricContexts: req.FabricContexts, + SecondaryFabricContexts: req.SecondaryFabricContexts, + FaultDomain: req.FaultDomain, + State: MemberStateJoined, } if err := m.db.AddMember(newMember); err != nil { return nil, errors.Wrap(err, "failed to add new member") diff --git a/src/control/system/membership_test.go b/src/control/system/membership_test.go index 3d5df1dff04..f555c99ccbb 100644 --- a/src/control/system/membership_test.go +++ b/src/control/system/membership_test.go @@ -761,7 +761,7 @@ func TestSystem_Membership_Join(t *testing.T) { UUID: newMember.UUID, ControlAddr: newMember.Addr, PrimaryFabricURI: newMember.Addr.String(), - FabricContexts: newMember.FabricContexts, + FabricContexts: newMember.PrimaryFabricContexts, FaultDomain: newMember.FaultDomain, }, expResp: &JoinResponse{ @@ -792,7 +792,7 @@ func TestSystem_Membership_Join(t *testing.T) { UUID: newMemberShallowFD.UUID, ControlAddr: newMemberShallowFD.Addr, PrimaryFabricURI: curMember.Addr.String(), - FabricContexts: newMemberShallowFD.FabricContexts, + FabricContexts: newMemberShallowFD.PrimaryFabricContexts, FaultDomain: newMemberShallowFD.FaultDomain, }, expErr: FaultBadFaultDomainDepth(newMemberShallowFD.FaultDomain, curMember.FaultDomain.NumLevels()), @@ -803,7 +803,7 @@ func TestSystem_Membership_Join(t *testing.T) { UUID: curMember.UUID, ControlAddr: curMember.Addr, PrimaryFabricURI: curMember.Addr.String(), - FabricContexts: curMember.FabricContexts, + FabricContexts: curMember.PrimaryFabricContexts, FaultDomain: shallowFD, }, expErr: FaultBadFaultDomainDepth(newMemberShallowFD.FaultDomain, curMember.FaultDomain.NumLevels()), diff --git a/src/control/system/mocks.go b/src/control/system/mocks.go index 9a1a4202b40..912eb1ae20f 100644 --- a/src/control/system/mocks.go +++ b/src/control/system/mocks.go @@ -57,7 +57,7 @@ func MockMember(t *testing.T, idx uint32, state MemberState, info ...string) *Me addr := MockControlAddr(t, idx) m := MockMemberFullSpec(t, Rank(idx), test.MockUUID(int32(idx)), addr.String(), addr, state) - m.FabricContexts = idx + m.PrimaryFabricContexts = idx if len(info) > 0 { m.Info = info[0] } diff --git a/src/control/system/raft/database.go b/src/control/system/raft/database.go index 6b84bb53eb6..375d143cde4 100644 --- a/src/control/system/raft/database.go +++ b/src/control/system/raft/database.go @@ -115,9 +115,11 @@ type ( // RankEntry comprises the information about a rank in GroupMap. RankEntry struct { - PrimaryURI string - SecondaryURIs []string - Incarnation uint64 + PrimaryURI string + NumPrimaryCtxs uint32 + SecondaryURIs []string + NumSecondaryCtxs []uint32 + Incarnation uint64 } // RaftComponents holds the components required to start a raft instance. @@ -559,9 +561,11 @@ func (db *Database) GroupMap() (*GroupMap, error) { } gm.RankEntries[srv.Rank] = RankEntry{ - PrimaryURI: srv.PrimaryFabricURI, - SecondaryURIs: srv.SecondaryFabricURIs, - Incarnation: srv.Incarnation, + PrimaryURI: srv.PrimaryFabricURI, + NumPrimaryCtxs: srv.PrimaryFabricContexts, + SecondaryURIs: srv.SecondaryFabricURIs, + NumSecondaryCtxs: srv.SecondaryFabricContexts, + Incarnation: srv.Incarnation, } if db.isReplica(srv.Addr) { gm.MSRanks = append(gm.MSRanks, srv.Rank) diff --git a/src/control/system/raft/database_test.go b/src/control/system/raft/database_test.go index 6da26259987..ec1c00bc935 100644 --- a/src/control/system/raft/database_test.go +++ b/src/control/system/raft/database_test.go @@ -904,14 +904,38 @@ func TestSystem_Database_GroupMap(t *testing.T) { expGroupMap: &GroupMap{ Version: 11, RankEntries: map[Rank]RankEntry{ - 0: {PrimaryURI: MockControlAddr(t, 0).String()}, - 2: {PrimaryURI: MockControlAddr(t, 2).String()}, - 3: {PrimaryURI: MockControlAddr(t, 3).String()}, - 4: {PrimaryURI: MockControlAddr(t, 4).String()}, - 5: {PrimaryURI: MockControlAddr(t, 5).String()}, - 6: {PrimaryURI: MockControlAddr(t, 6).String()}, - 9: {PrimaryURI: MockControlAddr(t, 9).String()}, - 10: {PrimaryURI: MockControlAddr(t, 10).String()}, + 0: { + PrimaryURI: MockControlAddr(t, 0).String(), + NumPrimaryCtxs: 0, + }, + 2: { + PrimaryURI: MockControlAddr(t, 2).String(), + NumPrimaryCtxs: 2, + }, + 3: { + PrimaryURI: MockControlAddr(t, 3).String(), + NumPrimaryCtxs: 3, + }, + 4: { + PrimaryURI: MockControlAddr(t, 4).String(), + NumPrimaryCtxs: 4, + }, + 5: { + PrimaryURI: MockControlAddr(t, 5).String(), + NumPrimaryCtxs: 5, + }, + 6: { + PrimaryURI: MockControlAddr(t, 6).String(), + NumPrimaryCtxs: 6, + }, + 9: { + PrimaryURI: MockControlAddr(t, 9).String(), + NumPrimaryCtxs: 9, + }, + 10: { + PrimaryURI: MockControlAddr(t, 10).String(), + NumPrimaryCtxs: 10, + }, }, }, }, @@ -920,8 +944,14 @@ func TestSystem_Database_GroupMap(t *testing.T) { expGroupMap: &GroupMap{ Version: 2, RankEntries: map[Rank]RankEntry{ - 0: {PrimaryURI: MockControlAddr(t, 0).String()}, - 1: {PrimaryURI: MockControlAddr(t, 1).String()}, + 0: { + PrimaryURI: MockControlAddr(t, 0).String(), + NumPrimaryCtxs: 0, + }, + 1: { + PrimaryURI: MockControlAddr(t, 1).String(), + NumPrimaryCtxs: 1, + }, }, MSRanks: []Rank{1}, }, @@ -933,7 +963,43 @@ func TestSystem_Database_GroupMap(t *testing.T) { expGroupMap: &GroupMap{ Version: 2, RankEntries: map[Rank]RankEntry{ - 0: {PrimaryURI: MockControlAddr(t, 0).String()}, + 0: { + PrimaryURI: MockControlAddr(t, 0).String(), + NumPrimaryCtxs: 0, + }, + }, + }, + }, + "secondary URIs": { + members: []*Member{ + { + Rank: 2, + UUID: uuid.MustParse(test.MockUUID(2)), + PrimaryFabricURI: MockControlAddr(t, 2).String(), + PrimaryFabricContexts: 8, + SecondaryFabricURIs: []string{ + MockControlAddr(t, 3).String(), + MockControlAddr(t, 4).String(), + }, + SecondaryFabricContexts: []uint32{4, 6}, + Addr: MockControlAddr(t, 2), + State: MemberStateJoined, + FaultDomain: MustCreateFaultDomain(), + LastUpdate: time.Now(), + }, + }, + expGroupMap: &GroupMap{ + Version: 1, + RankEntries: map[Rank]RankEntry{ + 2: { + PrimaryURI: MockControlAddr(t, 2).String(), + NumPrimaryCtxs: 8, + SecondaryURIs: []string{ + MockControlAddr(t, 3).String(), + MockControlAddr(t, 4).String(), + }, + NumSecondaryCtxs: []uint32{4, 6}, + }, }, }, }, diff --git a/src/control/system/raft/testdata/raft_recovery/daos_system.db b/src/control/system/raft/testdata/raft_recovery/daos_system.db index 0b5d96d92d243d4885888b1202e511a9820c0a35..769eef4a1d1e561278dcd7a9253e31081ff87b51 100644 GIT binary patch literal 65536 zcmeI*ON<;x83*w3`u#|}NjM;KamF!l*s`mts~@#UD3)!3MF7jY=3vWNS9e!O6V|h< znb}~=Sc(-X65<)e35f&95aR>M0SO@`;0w1Pap961;9O85Bou*!BE(lSU9&Ump4}Y` zy94^SwL4S&+MeIn(=%1|b-7mSO}om+_uo16{*-$@-J7w$obMkFmQN0qj~^=^7k+W> z?N3a9$)fKiu0Zo3i{5TIiPh@*FiP3~qmyk_-mR zwCcfE?w(v;w$0q;Cp&kJ9(~~(*_prIzkS+t%sV@7-KftuuG;!+U2-Xz;7o{lDQA_Q z<;iiO;vf=HtS{DQ>R(*0&x)D4+pa!)qyBk&HMK4G%;Y&e)`g6ZbNVM|1?ODQ7VVDd zxLv=n)M(nAW)x;+v9pjbT{Vp+%|CJV`iYKhcTSXZX6h}|{BnIZnW;CLnQ1jmr?EsU z@|pU|N+Wj%8R#%kykIuXBBs(X*Cxz@4GHoegHy&s>YC|x^vd)~ILPTiirzHBLe-RaO! z5TCwx{yqiq-7<(zuMz zWxEk|FMqD%&Y4S`C6{UQQ!A~OZFXIT7o4*#xAuI)w)#7|BxdAgl6;orFN}uoz4BNk zIwAN9=R+0Fj|@BiJ^XOM`RE4DA(w%Q#rn>wC<2kmjD?{HnM!QVbeN_rR9dJY(bDE| zwX=ImRXJY@FS`ZX$Q*JJC1Fs_{741!qeINcjz9I0ahSD`TJ^6<)u(fu#yrULf)!y( zSBhM7x=7@bMTJR{C`d)7Y&EkS#Vog-S@S4NMx4!$RxlqPX1@6Q_Xe2j8=TEi6vkS_ z>z~b0Yzrf^l*M$7U@DXe)9!*?Bq|a~l#3!))y(cXU3oV1?PjJIE;OB9Ro9He70T1Y z%3u4;M+cOTZJ-=0p#!;rvQajZVH7b<*J8Rh$A+ajPnZaGF7h-Iap68*E3VBBzfw7v ziMHFBUOIid7y&(90sYu8^siqWylQL&odi5mnm&Vj-(*HyHF7#}1G;LYhQ~}5HlXhp znPhY(r$MBPLKN;QTmc;=!K_FNS)|p_(J1I`KcDG^OP$DS=Ep0TpBQ5P_>*7%>cE9# zL*_(8L8#Ve&P)*`+{R3pl+I^PAAp*^owKwMhUX>^r5W*M!_BU|aBTahpiTtWYQ)+6 zLheJeq&wqvl>@LsPAxc1Dg*XC^NMcZn>`toZhsYBkj+pqf` zdC}{U|IyjgYiHe=TPX_HJzIOQ8+u2#DUE(pH?(iUm>T@scYgcJgP)}q%RhCG;ogIL zpf2owN47uyox^+h$>)%1{NW-}JpTL6jqdXoL`!>y+BpW6P9C zDTq-VGoGiCn>a}`;W8&yQ%@!%h*B!aBiCuanIP`22;!chAl8q?(*r?#X`PNwmsu20 z_i&{9$r4)=#9(9)GZkm91Bb;Kr590(kzqQsg3)}L^Hc>|x;&Rj3t}K*O6}QX4EI#T z(7z$^2muH{00Izz00bbgp+LV<-0$(P)t;fPTz%fx*FRWOzyE2O|L^Pf&(Tf>>;JUr z-r;Z6>9>A9#DN#4dq+LVa|k%Y?LRFl!?n=owVvLWuR-SfN%1hxW6KZ z2Zn?Aw_CTz2x1humZq^<3m-m+@yHC^P%%5Hgiz9nN4RB;RtfB*y_009U<00Izz00bZa zfyok})cfu%k*vWH0k-U_>Ziz;&l!-W?kJ~Z3fn} ztJ&GemTifbA}&b4g##QAGQ^66asVRC$r2H#@Q~nuB9K5_BB2~}0!4f^)4iS<_hffL zp0%TYt!ApLy1Tmj_f=Qd_vqP1V~}=~`P*;qe|sh@9~@N7mi1M(r(D=y<{v8a>mMr) z{r*26{YSMOau5Ik5C8!X009sH0T2KI5C8!X0D%XGz_#*KCh7ls$4+%$S+4Z|50~X9 z4t?|Iw|BHktHIb#D1rb8fB*=900@8p2!H?xfB*=900?X;fxfSw&S0idSpXW1<5UiQ z{~wgbfj?0m%0Dd2m$r|V|4q9Nucu9Szx2w9llaO(El>oc=^haLxtovC?zB(Dav=+8v)$Md4N#dkgu}W!LUd{jtkekM;aY?^szg z*X+8^SDGy~*KBt(*X_7odxWljm1e86y0{ols$WH0^!$|`Rny_xonmRQ>!Mrf zEvzo*gDaT;Nn0L^bIlub&1b2J=ld7Y>#q76H(vSlm7edc&}FRL`N?UL!Ij-N=Y99` z%+2|$%bujuPqU*Ux*h*zNJJOc5mBOfES1@eh~)n*B0W(=X{_TYO$sK3$(Rz(F((Cm z`P@0K(^MDP^hK1Y#8{;_D5KdL8SNRB(ObWIdgC&Rc({?coUV+_L>b9AmOhF-%cL<( zSxw?e(u|2Dj-n(=3Rg`3HX=yWbv7)cJvB1gJ0hd_gRgyI6EYH98zH8AAH@@8yMk6WG7mYwmMx!$WEMO+t?Q>i3Llw^-QH{ z&ghdfoutMHXKXH#Etq5{N|_BSLi=ha+51OiWZ$^`I?3o0rOoHadl(eNQ5jju6TxjI zBeKR+HlOE~mKM*RJ$;HyBTA5(Pii7dRZePt7DddGV43Em_q(Ww9S`bB*y9nO-n4o#)J6pxsZd zcDugQH?)kdc+Pjj*7I%OU2EvPn3ESsU3rEqIcFyua{5B=RK z=*sIjKT^Z_(NWHy{PpY*=O^F0KP%27Da1PcS!%;gZWI&2o~UG$v&0AUV(hsmvtyPU zQ_opW;C!CXnF~P#Ti2oK(xnl}1kjJxfIct^`qG#Fx*5>1=25(6<*M({+DEx9Qq823 zmZ?Y(=v*d@d#3~^1Fkf-9&{K_>N+%mmRk-w4nsCwdR>n`Py_nlDCqCM_M;)tA6chI z3$0`#*R1mOpy?LVG%OqH!jhhCQd#DrT#%73=c&w$Gb#+lH6yf|1UlMk%tA-h$Kra- z2Wv1tHj4ST^Vf$kuk#?JL4cchopD1amRu51Mi0I$m{Jomr508*pKHZcS}5fwbZ0$@ zvYHcH&6%!T2ocQXH4nnaYB)bW%K4plj&Fi<#H~rfC^c|0)pzIIy7#;jmsZ zTNE^I6nSL0=LxN@@6KV|t{XQ7?w>6kpXthlQ8XQ{LUqHRtPMX%m4=`n`B64NcQ$AEfnY0`v5STK7 z%IiPVAf>%b7by?=cdsnJQI=a}`Q5QHCp|rEU&RajrnDDShc4 zd-;s-zUaFv-~GnRC&nx?1&!Rws)C$`LNgwbrbuBKRwrT3cQ*=R5ae*5 z<0G`DnSIczHxeJQV@O10FKO%?5s{gF_ZOQG5gBsiv?EyLWC@v^QxFSh(uY}jOsc%| z6e!`9EwmFNu~|^urz{~u(Fw8MsEBsfh-lZSh)!I6Ye+=vg|Nh0DJ|*n)4YqKi89J0 zMX;F6G9wdmxLXtE^b{Qx8x>O&3;Evd7RZR?6Dbi?%#GYeyJ}=q-A>3s00ck)1V8`; zKmY_D2m)(LabJTUoYGe*3;KK&@Be`@|Nr8c|Nr$d|Nj$X{{I)p{QtkB#zXs6{J@iA z{(o8pfB$`ce;^X@_ru>`##O-IPeV=v{{H_dfCc{kDMbn?4uAg?0-5-1sJML`}@Pg5D5f8;Qdda^7?OU4BiCbbjd+CuN>7+m1Qyk^wVMb4(qG;2Ktqa zM&%hNO~0S2p9w6B$Up!DKmY_l00ck)1V8`;KmY_l00g#@K&7y6R{yW!2Fi1X{=bz% z#IZpD1V8`;KmY_l00ck)1V8`;KwupL!IvM>4E==^6#2vS3I6}@(I=FDT9(5<_oH8| zFDR=`Gec1UA&wvf0aWcrv>g6t-~sxC^eOs;a#&vmK85$}sT9&o_}#P@mU{lA>*Qm< z08p(O-h6TC(n@8?7_@+I5C8!X009sH0T2KI5C8!X009sHfx85%U;RH=M*LTC{ndK` z^lE-TA)x;*Gc17s2!H?xfB*=900@8p2!H?xfWX5>pwjOje*b@Ev3S@dfb#_b5C8!X z009sH0T2KI5C8!X0D=2Ypz`Zi{{QN|{wkWjir=r|1n&PC;RqlA0w4eaAOHd&00JNY z0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4ea zAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAn^Vp F@L!>qeNq4b diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/state.bin deleted file mode 100644 index e5908f65f19..00000000000 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/state.bin +++ /dev/null @@ -1 +0,0 @@ -{"Version":8,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"f3f74662-e143-484f-a715-038cf1008590","2":"f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1","3":"c273b6c8-629b-486a-af22-7647eda4b981","4":"d35d2696-7a7f-44a9-b31f-31c1b9d1e22f","5":"b7573b9f-216c-41aa-a21a-aedaa05bb5fc","6":"2772e37e-c266-485a-a9bc-19733939faff","7":"a8122f20-576b-4a83-a72b-4cba4c4b99b3","8":"87ef4971-b58e-4bbd-bbdd-59b661a68d19"},"Uuids":{"2772e37e-c266-485a-a9bc-19733939faff":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"2772e37e-c266-485a-a9bc-19733939faff","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.230275745Z"},"87ef4971-b58e-4bbd-bbdd-59b661a68d19":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"87ef4971-b58e-4bbd-bbdd-59b661a68d19","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.231944645Z"},"a8122f20-576b-4a83-a72b-4cba4c4b99b3":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"a8122f20-576b-4a83-a72b-4cba4c4b99b3","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.231042679Z"},"b7573b9f-216c-41aa-a21a-aedaa05bb5fc":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"b7573b9f-216c-41aa-a21a-aedaa05bb5fc","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.229496845Z"},"c273b6c8-629b-486a-af22-7647eda4b981":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"c273b6c8-629b-486a-af22-7647eda4b981","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.227533412Z"},"d35d2696-7a7f-44a9-b31f-31c1b9d1e22f":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"d35d2696-7a7f-44a9-b31f-31c1b9d1e22f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.22858071Z"},"f3f74662-e143-484f-a715-038cf1008590":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"f3f74662-e143-484f-a715-038cf1008590","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.22554103Z"},"f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.226600753Z"}},"Addrs":{"127.0.0.1:10001":["f3f74662-e143-484f-a715-038cf1008590","f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1","c273b6c8-629b-486a-af22-7647eda4b981","d35d2696-7a7f-44a9-b31f-31c1b9d1e22f"],"127.0.0.2:10001":["b7573b9f-216c-41aa-a21a-aedaa05bb5fc","2772e37e-c266-485a-a9bc-19733939faff","a8122f20-576b-4a83-a72b-4cba4c4b99b3","87ef4971-b58e-4bbd-bbdd-59b661a68d19"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{},"Uuids":{},"Labels":{}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/meta.json b/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/meta.json similarity index 51% rename from src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/meta.json rename to src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/meta.json index 10dc40fca8c..8c0a130d409 100644 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1664895530778/meta.json +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/meta.json @@ -1 +1 @@ -{"Version":1,"ID":"2-11-1664895530778","Index":11,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":4177,"CRC":"1fxOXbl6VZs="} +{"Version":1,"ID":"2-11-1665528548388","Index":11,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":4441,"CRC":"tIArpKQ32y0="} diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin new file mode 100644 index 00000000000..15ec7c14c58 --- /dev/null +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-11-1665528548388/state.bin @@ -0,0 +1 @@ +{"Version":8,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"8a5818a9-4759-4762-95cc-71a43278e70d","2":"a111e09b-407e-41b2-9677-0db20a78bc1f","3":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","4":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","5":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","6":"fe344a6f-7255-42af-b75d-126e6d862285","7":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","8":"2395840f-ebc6-4b71-807c-9dcf644e0623"},"Uuids":{"2395840f-ebc6-4b71-807c-9dcf644e0623":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"2395840f-ebc6-4b71-807c-9dcf644e0623","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.147351335Z"},"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14418846Z"},"8a5818a9-4759-4762-95cc-71a43278e70d":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"8a5818a9-4759-4762-95cc-71a43278e70d","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.141166341Z"},"a111e09b-407e-41b2-9677-0db20a78bc1f":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"a111e09b-407e-41b2-9677-0db20a78bc1f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.142242063Z"},"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145014303Z"},"c47c9082-7c0f-40fa-9ce1-c472d0b43920":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14317539Z"},"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.146615014Z"},"fe344a6f-7255-42af-b75d-126e6d862285":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"fe344a6f-7255-42af-b75d-126e6d862285","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145799472Z"}},"Addrs":{"127.0.0.1:10001":["8a5818a9-4759-4762-95cc-71a43278e70d","a111e09b-407e-41b2-9677-0db20a78bc1f","c47c9082-7c0f-40fa-9ce1-c472d0b43920","44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0"],"127.0.0.2:10001":["bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fe344a6f-7255-42af-b75d-126e6d862285","d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","2395840f-ebc6-4b71-807c-9dcf644e0623"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{},"Uuids":{},"Labels":{}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/state.bin deleted file mode 100644 index 995936b8bde..00000000000 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/state.bin +++ /dev/null @@ -1 +0,0 @@ -{"Version":16,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"f3f74662-e143-484f-a715-038cf1008590","2":"f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1","3":"c273b6c8-629b-486a-af22-7647eda4b981","4":"d35d2696-7a7f-44a9-b31f-31c1b9d1e22f","5":"b7573b9f-216c-41aa-a21a-aedaa05bb5fc","6":"2772e37e-c266-485a-a9bc-19733939faff","7":"a8122f20-576b-4a83-a72b-4cba4c4b99b3","8":"87ef4971-b58e-4bbd-bbdd-59b661a68d19"},"Uuids":{"2772e37e-c266-485a-a9bc-19733939faff":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"2772e37e-c266-485a-a9bc-19733939faff","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.230275745Z"},"87ef4971-b58e-4bbd-bbdd-59b661a68d19":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"87ef4971-b58e-4bbd-bbdd-59b661a68d19","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.231944645Z"},"a8122f20-576b-4a83-a72b-4cba4c4b99b3":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"a8122f20-576b-4a83-a72b-4cba4c4b99b3","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.231042679Z"},"b7573b9f-216c-41aa-a21a-aedaa05bb5fc":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"b7573b9f-216c-41aa-a21a-aedaa05bb5fc","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.229496845Z"},"c273b6c8-629b-486a-af22-7647eda4b981":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"c273b6c8-629b-486a-af22-7647eda4b981","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.227533412Z"},"d35d2696-7a7f-44a9-b31f-31c1b9d1e22f":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"d35d2696-7a7f-44a9-b31f-31c1b9d1e22f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.22858071Z"},"f3f74662-e143-484f-a715-038cf1008590":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"f3f74662-e143-484f-a715-038cf1008590","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.22554103Z"},"f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"info":"","last_update":"2022-10-04T14:58:50.226600753Z"}},"Addrs":{"127.0.0.1:10001":["f3f74662-e143-484f-a715-038cf1008590","f6d035d0-2e2e-47d4-ae6c-e48f5a1198c1","c273b6c8-629b-486a-af22-7647eda4b981","d35d2696-7a7f-44a9-b31f-31c1b9d1e22f"],"127.0.0.2:10001":["b7573b9f-216c-41aa-a21a-aedaa05bb5fc","2772e37e-c266-485a-a9bc-19733939faff","a8122f20-576b-4a83-a72b-4cba4c4b99b3","87ef4971-b58e-4bbd-bbdd-59b661a68d19"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{"0":["25606d64-131f-423d-9ea9-b7e0ed35cd66","5e3d8fb5-2a28-431d-9d29-0ea410e22268"],"1":["5985d23c-50c6-489a-af85-ed5404bf44e8"],"2":["5e3d8fb5-2a28-431d-9d29-0ea410e22268"],"3":["1205c336-b72e-4248-a3d1-19d0b2c6a640"],"4":["5985d23c-50c6-489a-af85-ed5404bf44e8"],"6":["25606d64-131f-423d-9ea9-b7e0ed35cd66"]},"Uuids":{"1205c336-b72e-4248-a3d1-19d0b2c6a640":{"PoolUUID":"1205c336-b72e-4248-a3d1-19d0b2c6a640","PoolLabel":"pool0004","State":1,"Replicas":[3],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.241537507Z"},"25606d64-131f-423d-9ea9-b7e0ed35cd66":{"PoolUUID":"25606d64-131f-423d-9ea9-b7e0ed35cd66","PoolLabel":"pool0002","State":1,"Replicas":[0,6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.240026196Z"},"41573aff-b4dc-4c33-8724-50216a3f7a03":{"PoolUUID":"41573aff-b4dc-4c33-8724-50216a3f7a03","PoolLabel":"pool0000","State":1,"Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.238197993Z"},"5985d23c-50c6-489a-af85-ed5404bf44e8":{"PoolUUID":"5985d23c-50c6-489a-af85-ed5404bf44e8","PoolLabel":"pool0005","State":1,"Replicas":[4,1],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.242301115Z"},"5e3d8fb5-2a28-431d-9d29-0ea410e22268":{"PoolUUID":"5e3d8fb5-2a28-431d-9d29-0ea410e22268","PoolLabel":"pool0003","State":1,"Replicas":[0,2],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.240744642Z"},"9c6c58b9-14af-4f42-b3d4-b0f36d8cf82a":{"PoolUUID":"9c6c58b9-14af-4f42-b3d4-b0f36d8cf82a","PoolLabel":"pool0001","State":1,"Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.239212036Z"},"cac28b7e-8c3d-4e43-9bff-00137ef44803":{"PoolUUID":"cac28b7e-8c3d-4e43-9bff-00137ef44803","PoolLabel":"pool0007","State":1,"Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.243843228Z"},"dad5588a-222c-48e2-8ff9-0fd360e09d55":{"PoolUUID":"dad5588a-222c-48e2-8ff9-0fd360e09d55","PoolLabel":"pool0006","State":1,"Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-04T14:58:51.243081644Z"}},"Labels":{"pool0000":"41573aff-b4dc-4c33-8724-50216a3f7a03","pool0001":"9c6c58b9-14af-4f42-b3d4-b0f36d8cf82a","pool0002":"25606d64-131f-423d-9ea9-b7e0ed35cd66","pool0003":"5e3d8fb5-2a28-431d-9d29-0ea410e22268","pool0004":"1205c336-b72e-4248-a3d1-19d0b2c6a640","pool0005":"5985d23c-50c6-489a-af85-ed5404bf44e8","pool0006":"dad5588a-222c-48e2-8ff9-0fd360e09d55","pool0007":"cac28b7e-8c3d-4e43-9bff-00137ef44803"}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/meta.json b/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/meta.json similarity index 51% rename from src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/meta.json rename to src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/meta.json index a6e6edbeda9..d6e2fccb8d4 100644 --- a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1664895532148/meta.json +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/meta.json @@ -1 +1 @@ -{"Version":1,"ID":"2-19-1664895532148","Index":19,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":7094,"CRC":"G/tg0yhXFKE="} +{"Version":1,"ID":"2-19-1665528549936","Index":19,"Term":2,"Peers":"ka8xMjcuMC4wLjE6MTAwMDE=","Configuration":{"Servers":[{"Suffrage":0,"ID":"127.0.0.1:10001","Address":"127.0.0.1:10001"}]},"ConfigurationIndex":1,"Size":7424,"CRC":"WB3vb8m2U3w="} diff --git a/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin b/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin new file mode 100644 index 00000000000..adcad90e3ef --- /dev/null +++ b/src/control/system/raft/testdata/raft_recovery/snapshots/2-19-1665528549936/state.bin @@ -0,0 +1 @@ +{"Version":16,"NextRank":9,"MapVersion":8,"Members":{"Ranks":{"1":"8a5818a9-4759-4762-95cc-71a43278e70d","2":"a111e09b-407e-41b2-9677-0db20a78bc1f","3":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","4":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","5":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","6":"fe344a6f-7255-42af-b75d-126e6d862285","7":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","8":"2395840f-ebc6-4b71-807c-9dcf644e0623"},"Uuids":{"2395840f-ebc6-4b71-807c-9dcf644e0623":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":8,"incarnation":0,"uuid":"2395840f-ebc6-4b71-807c-9dcf644e0623","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.147351335Z"},"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":4,"incarnation":0,"uuid":"44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14418846Z"},"8a5818a9-4759-4762-95cc-71a43278e70d":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":1,"incarnation":0,"uuid":"8a5818a9-4759-4762-95cc-71a43278e70d","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.141166341Z"},"a111e09b-407e-41b2-9677-0db20a78bc1f":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":2,"incarnation":0,"uuid":"a111e09b-407e-41b2-9677-0db20a78bc1f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.142242063Z"},"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":5,"incarnation":0,"uuid":"bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145014303Z"},"c47c9082-7c0f-40fa-9ce1-c472d0b43920":{"addr":"127.0.0.1:10001","state":"joined","fault_domain":"/my/test/domain","rank":3,"incarnation":0,"uuid":"c47c9082-7c0f-40fa-9ce1-c472d0b43920","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.14317539Z"},"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":7,"incarnation":0,"uuid":"d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.146615014Z"},"fe344a6f-7255-42af-b75d-126e6d862285":{"addr":"127.0.0.2:10001","state":"joined","fault_domain":"/my/test/domain","rank":6,"incarnation":0,"uuid":"fe344a6f-7255-42af-b75d-126e6d862285","fabric_uri":"","secondary_fabric_uris":null,"fabric_contexts":0,"secondary_fabric_contexts":null,"info":"","last_update":"2022-10-11T22:49:08.145799472Z"}},"Addrs":{"127.0.0.1:10001":["8a5818a9-4759-4762-95cc-71a43278e70d","a111e09b-407e-41b2-9677-0db20a78bc1f","c47c9082-7c0f-40fa-9ce1-c472d0b43920","44b4cd58-73f6-40b0-86d7-d4b1ed2fd8c0"],"127.0.0.2:10001":["bf95cedc-19f8-4de4-a1ce-a93aa3eebedf","fe344a6f-7255-42af-b75d-126e6d862285","d844bfa0-9d9c-4b31-9ff0-37c20ef17d7f","2395840f-ebc6-4b71-807c-9dcf644e0623"]},"FaultDomains":{"Domain":{"Domains":null},"ID":1,"Children":[{"Domain":{"Domains":["my"]},"ID":2,"Children":[{"Domain":{"Domains":["my","test"]},"ID":3,"Children":[{"Domain":{"Domains":["my","test","domain"]},"ID":4,"Children":[{"Domain":{"Domains":["my","test","domain","rank1"]},"ID":5,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank2"]},"ID":6,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank3"]},"ID":7,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank4"]},"ID":8,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank5"]},"ID":9,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank6"]},"ID":10,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank7"]},"ID":11,"Children":[]},{"Domain":{"Domains":["my","test","domain","rank8"]},"ID":12,"Children":[]}]}]}]}]}},"Pools":{"Ranks":{"0":["67efa2cb-7fca-4528-907c-f846186d1fd4"],"2":["7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e","caf380e7-1ab7-40d2-9100-bf1a0dad52a7"],"3":["a4ec2566-9a5f-447a-bd08-159d1db617fd","7df843df-ba07-4fe3-80c2-86db369ff1f1"],"5":["4ff31c2c-5515-48ed-95bb-549143892ed7"],"6":["a4ec2566-9a5f-447a-bd08-159d1db617fd","7df843df-ba07-4fe3-80c2-86db369ff1f1","678fa845-ce21-47ef-b802-df491cfbf2fb"]},"Uuids":{"155ebf05-dc82-449c-ae62-4da5fcac783f":{"PoolUUID":"155ebf05-dc82-449c-ae62-4da5fcac783f","PoolLabel":"pool0000","State":1,"Replicas":null,"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.154074898Z"},"4ff31c2c-5515-48ed-95bb-549143892ed7":{"PoolUUID":"4ff31c2c-5515-48ed-95bb-549143892ed7","PoolLabel":"pool0001","State":1,"Replicas":[5],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.155016853Z"},"678fa845-ce21-47ef-b802-df491cfbf2fb":{"PoolUUID":"678fa845-ce21-47ef-b802-df491cfbf2fb","PoolLabel":"pool0006","State":1,"Replicas":[6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.159082982Z"},"67efa2cb-7fca-4528-907c-f846186d1fd4":{"PoolUUID":"67efa2cb-7fca-4528-907c-f846186d1fd4","PoolLabel":"pool0003","State":1,"Replicas":[0],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.15665199Z"},"7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e":{"PoolUUID":"7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e","PoolLabel":"pool0002","State":1,"Replicas":[2],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.155906588Z"},"7df843df-ba07-4fe3-80c2-86db369ff1f1":{"PoolUUID":"7df843df-ba07-4fe3-80c2-86db369ff1f1","PoolLabel":"pool0005","State":1,"Replicas":[3,6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.158300971Z"},"a4ec2566-9a5f-447a-bd08-159d1db617fd":{"PoolUUID":"a4ec2566-9a5f-447a-bd08-159d1db617fd","PoolLabel":"pool0004","State":1,"Replicas":[3,6],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.157413619Z"},"caf380e7-1ab7-40d2-9100-bf1a0dad52a7":{"PoolUUID":"caf380e7-1ab7-40d2-9100-bf1a0dad52a7","PoolLabel":"pool0007","State":1,"Replicas":[2],"Storage":{"CreationRankStr":"[0-8]","CurrentRankStr":"[0-8]","PerRankTierStorage":[1,2]},"LastUpdate":"2022-10-11T22:49:09.159810941Z"}},"Labels":{"pool0000":"155ebf05-dc82-449c-ae62-4da5fcac783f","pool0001":"4ff31c2c-5515-48ed-95bb-549143892ed7","pool0002":"7b703ddf-f5b6-4d90-9dd2-6fa8863b1c4e","pool0003":"67efa2cb-7fca-4528-907c-f846186d1fd4","pool0004":"a4ec2566-9a5f-447a-bd08-159d1db617fd","pool0005":"7df843df-ba07-4fe3-80c2-86db369ff1f1","pool0006":"678fa845-ce21-47ef-b802-df491cfbf2fb","pool0007":"caf380e7-1ab7-40d2-9100-bf1a0dad52a7"}},"System":{"Attributes":{}},"SchemaVersion":0} \ No newline at end of file diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index 4a19dbf0412..b2eef414bab 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1421,7 +1421,7 @@ const ProtobufCMessageDescriptor mgmt__client_net_hint__descriptor = (ProtobufCMessageInit) mgmt__client_net_hint__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__field_descriptors[3] = +static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__field_descriptors[4] = { { "rank", @@ -1459,8 +1459,21 @@ static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__fiel 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "num_ctxs", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp__RankUri, num_ctxs), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__get_attach_info_resp__rank_uri__field_indices_by_name[] = { + 3, /* field[3] = num_ctxs */ 2, /* field[2] = provider_idx */ 0, /* field[0] = rank */ 1, /* field[1] = uri */ @@ -1468,7 +1481,7 @@ static const unsigned mgmt__get_attach_info_resp__rank_uri__field_indices_by_nam static const ProtobufCIntRange mgmt__get_attach_info_resp__rank_uri__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 3 } + { 0, 4 } }; const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__rank_uri__descriptor = { @@ -1478,7 +1491,7 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__rank_uri__descripto "Mgmt__GetAttachInfoResp__RankUri", "mgmt", sizeof(Mgmt__GetAttachInfoResp__RankUri), - 3, + 4, mgmt__get_attach_info_resp__rank_uri__field_descriptors, mgmt__get_attach_info_resp__rank_uri__field_indices_by_name, 1, mgmt__get_attach_info_resp__rank_uri__number_ranges, diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index a2704d3d151..cf862c4e20c 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -286,10 +286,11 @@ struct _Mgmt__GetAttachInfoResp__RankUri uint32_t rank; char *uri; uint32_t provider_idx; + uint32_t num_ctxs; }; #define MGMT__GET_ATTACH_INFO_RESP__RANK_URI__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_resp__rank_uri__descriptor) \ - , 0, (char *)protobuf_c_empty_string, 0 } + , 0, (char *)protobuf_c_empty_string, 0, 0 } struct _Mgmt__GetAttachInfoResp diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index b48659b296f..acc8e1efa74 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -91,6 +91,7 @@ message GetAttachInfoResp { uint32 rank = 1; string uri = 2; uint32 provider_idx = 3; + uint32 num_ctxs = 4; } repeated RankUri rank_uris = 2; // Rank URIs for the primary provider // These CaRT settings are shared with the From afaa547e9fa1e40f156f4b8a8a5e71d96d19450f Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 17 Nov 2022 22:57:21 -0800 Subject: [PATCH 22/28] DAOS-12108 cart: Fix segfault over secondary provider (#10860) - daos tool crashed when running over a secondary provider This issue was caused by wrong structure being used on the client end. Signed-off-by: Alexander A Oganezov --- src/cart/crt_group.c | 11 +---------- src/cart/crt_init.c | 4 ++++ src/cart/crt_internal_types.h | 6 ++++++ src/cart/crt_rpc.c | 12 +++++------- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/cart/crt_group.c b/src/cart/crt_group.c index 2ded80ba87b..2aeec5dc90b 100644 --- a/src/cart/crt_group.c +++ b/src/cart/crt_group.c @@ -1852,8 +1852,6 @@ crt_group_config_path_set(const char *path) int crt_nr_secondary_remote_tags_set(int idx, int num_tags) { - struct crt_prov_gdata *prov_data; - D_DEBUG(DB_ALL, "secondary_idx=%d num_tags=%d\n", idx, num_tags); if (idx != 0) { @@ -1861,19 +1859,12 @@ crt_nr_secondary_remote_tags_set(int idx, int num_tags) return -DER_NONEXIST; } - if ((crt_gdata.cg_prov_gdata_secondary == NULL) || - (idx >= crt_gdata.cg_num_secondary_provs)) { - D_ERROR("Secondary providers not initialized\n"); - return -DER_NONEXIST; - } - if (num_tags <= 0) { D_ERROR("Invalid number of tags: %d\n", num_tags); return -DER_INVAL; } - prov_data = &crt_gdata.cg_prov_gdata_secondary[idx]; - prov_data->cpg_num_remote_tags = num_tags; + crt_gdata.cg_num_remote_tags = num_tags; return DER_SUCCESS; } diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index bac29b3f6f8..5cd4af85549 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -199,6 +199,10 @@ static int data_init(int server, crt_init_options_t *opt) crt_gdata.cg_inited = 0; crt_gdata.cg_primary_prov = CRT_PROV_OFI_SOCKETS; + /* By default set number of secondary remote tags to 1 */ + crt_gdata.cg_num_remote_tags = 1; + crt_gdata.cg_last_remote_tag = 0; + d_srand(d_timeus_secdiff(0) + getpid()); start_rpcid = ((uint64_t)d_rand()) << 32; diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 0adbf26ddb3..e5360dd1494 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -115,6 +115,12 @@ struct crt_gdata { ATOMIC uint64_t cg_rpcid; /* rpc id */ + /** Last remote tag sent */ + uint32_t cg_last_remote_tag; + + /** Number of remote tags */ + uint32_t cg_num_remote_tags; + /* protects crt_gdata */ pthread_rwlock_t cg_rwlock; diff --git a/src/cart/crt_rpc.c b/src/cart/crt_rpc.c index c96b49c547d..6ef159e200a 100644 --- a/src/cart/crt_rpc.c +++ b/src/cart/crt_rpc.c @@ -1107,7 +1107,6 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) crt_phy_addr_t uri = NULL; int rc = 0; crt_phy_addr_t base_addr = NULL; - struct crt_prov_gdata *prov_data; int dst_tag; req = &rpc_priv->crp_pub; @@ -1122,13 +1121,12 @@ crt_req_ep_lc_lookup(struct crt_rpc_priv *rpc_priv, bool *uri_exists) /* For a secondary provider round-robin between all available remote contexts */ if (!crt_gdata.cg_provider_is_primary) { - prov_data = &crt_gdata.cg_prov_gdata_secondary[0]; + D_RWLOCK_WRLOCK(&crt_gdata.cg_rwlock); + crt_gdata.cg_last_remote_tag++; + crt_gdata.cg_last_remote_tag %= crt_gdata.cg_num_remote_tags; - D_MUTEX_LOCK(&prov_data->cpg_mutex); - prov_data->cpg_last_remote_tag++; - prov_data->cpg_last_remote_tag %= prov_data->cpg_num_remote_tags; - dst_tag = prov_data->cpg_last_remote_tag; - D_MUTEX_UNLOCK(&prov_data->cpg_mutex); + dst_tag = crt_gdata.cg_last_remote_tag; + D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); } crt_grp_lc_lookup(grp_priv, ctx->cc_idx, From 8207c130e8da461d330e9ffc57d2a02e9c02ebc2 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Sat, 3 Dec 2022 08:52:27 +0800 Subject: [PATCH 23/28] DAOS-12111 object: fix race in obj_bulk_inflights() (#10949) When bulk transfer is over secondary provider, the bulk completion is called on secondary xstream and the bulk transfer waiter is on primary xstream, so in the obj_bulk_inflights() (which is called in the bulk completion), we shouldn't access the completion argument (which could have been freed by the waiter) after wakeup waiter. Signed-off-by: Niu Yawei --- src/object/srv_obj.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 4a3c7635486..74144748f09 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -334,11 +334,12 @@ obj_bulk_inflights(struct obj_bulk_args *args, crt_rpc_t *rpc, int val) D_ASSERT(args->bulks_inflight > 0); args->bulks_inflight += val; - if (args->bulks_inflight == 0) - ABT_eventual_set(args->eventual, &args->result, sizeof(args->result)); if (!is_primary) ABT_mutex_unlock(args->lock); + + if (args->bulks_inflight == 0) + ABT_eventual_set(args->eventual, &args->result, sizeof(args->result)); } static int @@ -383,6 +384,13 @@ bulk_cp(const struct crt_bulk_cb_info *cb_info) static inline int cached_bulk_cp(const struct crt_bulk_cb_info *cb_info) { + struct crt_bulk_desc *bulk_desc = cb_info->bci_bulk_desc; + crt_rpc_t *rpc = bulk_desc->bd_rpc; + bool is_primary; + + rpc2orig_ctx(rpc, &is_primary); + D_ASSERT(is_primary); + return obj_bulk_comp_cb(cb_info); } From 07e10f66a4d6e2a94877de6cf930ed48ca1bfae6 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 14 Apr 2023 09:48:32 -0600 Subject: [PATCH 24/28] DAOS-13088 control: Fix provider query with multiprovider (#11936) * DAOS-13088 control: Fix provider query with multiprovider Some additional work was necessary to make specified provider query work with multiprovider. * DAOS-13153 control: Use libfabric method to alloc hint Allocating the hint manually wasn't working for some providers. Signed-off-by: Kris Jacque --------- Signed-off-by: Kris Jacque --- src/control/cmd/daos_server/storage_utils.go | 14 +++++++++-- .../lib/hardware/libfabric/bindings.go | 23 +++++++++---------- .../lib/hardware/libfabric/provider.go | 2 +- src/control/server/server.go | 7 +++++- src/control/server/server_utils.go | 21 ++++++++++++++++- 5 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/control/cmd/daos_server/storage_utils.go b/src/control/cmd/daos_server/storage_utils.go index 35694e82f6f..81648112bf4 100644 --- a/src/control/cmd/daos_server/storage_utils.go +++ b/src/control/cmd/daos_server/storage_utils.go @@ -100,7 +100,12 @@ type scmCmd struct { func genFiAffFn(fis *hardware.FabricInterfaceSet) config.EngineAffinityFn { return func(l logging.Logger, e *engine.Config) (uint, error) { - fi, err := fis.GetInterfaceOnNetDevice(e.Fabric.Interface, e.Fabric.Provider) + prov, err := e.Fabric.GetPrimaryProvider() + if err != nil { + return 0, errors.Wrap(err, "getting primary provider") + } + + fi, err := fis.GetInterfaceOnNetDevice(e.Fabric.Interface, prov) if err != nil { return 0, err } @@ -111,7 +116,12 @@ func genFiAffFn(fis *hardware.FabricInterfaceSet) config.EngineAffinityFn { func getAffinitySource(log logging.Logger, cfg *config.Server) (config.EngineAffinityFn, error) { scanner := hwprov.DefaultFabricScanner(log) - fiSet, err := scanner.Scan(context.Background(), cfg.Fabric.Provider) + provs, err := cfg.Fabric.GetProviders() + if err != nil { + return nil, errors.Wrap(err, "getting configured providers") + } + + fiSet, err := scanner.Scan(context.Background(), provs...) if err != nil { return nil, errors.Wrap(err, "scan fabric") } diff --git a/src/control/lib/hardware/libfabric/bindings.go b/src/control/lib/hardware/libfabric/bindings.go index 5e49f417a01..65c6a3ac5a2 100644 --- a/src/control/lib/hardware/libfabric/bindings.go +++ b/src/control/lib/hardware/libfabric/bindings.go @@ -89,6 +89,7 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/lib/dlopen" + "github.com/daos-stack/daos/src/control/logging" ) // Load dynamically loads the libfabric library and provides a method to unload it. @@ -143,7 +144,7 @@ func (f *fiInfo) hfiUnit() (uint, error) { // fiGetInfo fetches the list of fi_info structs with the desired provider (if non-empty), or all of // them otherwise. It also returns the cleanup function to free the fi_info. -func fiGetInfo(hdl *dlopen.LibHandle, prov string) ([]*fiInfo, func() error, error) { +func fiGetInfo(log logging.Logger, hdl *dlopen.LibHandle, prov string) ([]*fiInfo, func() error, error) { getInfoPtr, err := getLibFuncPtr(hdl, "fi_getinfo") if err != nil { return nil, nil, err @@ -151,20 +152,18 @@ func fiGetInfo(hdl *dlopen.LibHandle, prov string) ([]*fiInfo, func() error, err var hint *C.struct_fi_info if len(prov) != 0 { - hint = (*C.struct_fi_info)(C.calloc(C.ulong(unsafe.Sizeof(C.struct_fi_info{})), 1)) - if hint == nil { - return nil, nil, errors.New("unable to allocate hint") + var cleanupHint func() error + hint, cleanupHint, err = fiAllocInfo(hdl) + if err != nil { + return nil, nil, errors.Wrap(err, "allocating fi_info hint") } - defer C.free(unsafe.Pointer(hint)) - - hint.fabric_attr = (*C.struct_fi_fabric_attr)(C.calloc(C.ulong(unsafe.Sizeof(C.struct_fi_fabric_attr{})), 1)) - if hint.fabric_attr == nil { - return nil, nil, errors.New("unable to allocate fabric attributes for hint") - } - defer C.free(unsafe.Pointer(hint.fabric_attr)) + defer func() { + if cleanupErr := cleanupHint(); cleanupErr != nil && err != nil { + log.Errorf("failed to clean up fi_info hint: %s", err.Error()) + } + }() hint.fabric_attr.prov_name = C.CString(prov) - defer C.free(unsafe.Pointer(hint.fabric_attr.prov_name)) } var fi *C.struct_fi_info diff --git a/src/control/lib/hardware/libfabric/provider.go b/src/control/lib/hardware/libfabric/provider.go index 69da794d7d7..7ab16f2ed6c 100644 --- a/src/control/lib/hardware/libfabric/provider.go +++ b/src/control/lib/hardware/libfabric/provider.go @@ -55,7 +55,7 @@ func (p *Provider) getFabricInterfaces(provider string, ch chan *fabricResult) { } defer hdl.Close() - fiInfo, cleanup, err := fiGetInfo(hdl, extProviderToLibFabric(provider)) + fiInfo, cleanup, err := fiGetInfo(p.log, hdl, extProviderToLibFabric(provider)) if err != nil { ch <- &fabricResult{ err: err, diff --git a/src/control/server/server.go b/src/control/server/server.go index 435e7602734..3fc06fd98fc 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -577,6 +577,11 @@ func Start(log logging.Logger, cfg *config.Server) error { ctx, shutdown := context.WithCancel(context.Background()) defer shutdown() + providers, err := cfg.Fabric.GetProviders() + if err != nil { + return err + } + hwprovFini, err := hwprov.Init(log) if err != nil { return err @@ -589,7 +594,7 @@ func Start(log logging.Logger, cfg *config.Server) error { scanner := hwprov.DefaultFabricScanner(log) - fiSet, err := scanner.Scan(ctx, cfg.Fabric.Provider) + fiSet, err := scanner.Scan(ctx, providers...) if err != nil { return errors.Wrap(err, "scan fabric") } diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index 815b5b39b01..faea3e13917 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -215,8 +215,27 @@ func getFabricNetDevClass(cfg *config.Server, fis *hardware.FabricInterfaceSet) return nil, err } + provs, err := engine.Fabric.GetProviders() + if err != nil { + return nil, err + } + + if len(provs) == 1 { + for i := range cfgIfaces { + if i == 0 { + continue + } + provs = append(provs, provs[0]) + } + } + + if len(cfgIfaces) != len(provs) { + return nil, fmt.Errorf("number of ifaces (%d) and providers (%d) not equal", + len(cfgIfaces), len(provs)) + } + for i, cfgIface := range cfgIfaces { - fi, err := fis.GetInterfaceOnNetDevice(cfgIface, engine.Fabric.Provider) + fi, err := fis.GetInterfaceOnNetDevice(cfgIface, provs[i]) if err != nil { return nil, err } From 0f2e0463640c449cbf84f1aafaa10723cabc4d8c Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 19 Apr 2023 03:27:39 +0800 Subject: [PATCH 25/28] DAOS-13134 pool: use primary context to do bcast (#11923) Server to server communication has to go through primary context since servers are not aware of each others secondary addresses. This patch changed bcast code to use the primary context stored in dss_module_info->dmi_ctx instead of rpc->cr_ctx which could be secondary context if the RPC is from secondary provider. Signed-off-by: Niu Yawei --- src/container/srv_container.c | 38 ++++++++++++++------------------ src/container/srv_epoch.c | 2 +- src/include/daos_srv/container.h | 2 +- src/pool/srv_pool.c | 17 +++++++------- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index ebe3c521f2b..b1216f061b5 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1446,12 +1446,10 @@ find_hdls_by_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) } static int cont_close_hdls(struct cont_svc *svc, - struct cont_tgt_close_rec *recs, int nrecs, - crt_context_t ctx); + struct cont_tgt_close_rec *recs, int nrecs); static int -evict_hdls(struct rdb_tx *tx, struct cont *cont, bool force, struct ds_pool_hdl *pool_hdl, - crt_context_t ctx) +evict_hdls(struct rdb_tx *tx, struct cont *cont, bool force, struct ds_pool_hdl *pool_hdl) { struct find_hdls_by_cont_arg arg; int rc; @@ -1477,7 +1475,7 @@ evict_hdls(struct rdb_tx *tx, struct cont *cont, bool force, struct ds_pool_hdl goto out; } - rc = cont_close_hdls(cont->c_svc, arg.fha_buf.rb_recs, arg.fha_buf.rb_nrecs, ctx); + rc = cont_close_hdls(cont->c_svc, arg.fha_buf.rb_recs, arg.fha_buf.rb_nrecs); out: recs_buf_fini(&arg.fha_buf); @@ -1528,11 +1526,11 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, D_GOTO(out_prop, rc = -DER_NO_PERM); } - rc = evict_hdls(tx, cont, in->cdi_force, NULL /* pool_hdl */, rpc->cr_ctx); + rc = evict_hdls(tx, cont, in->cdi_force, NULL /* pool_hdl */); if (rc != 0) goto out_prop; - rc = cont_destroy_bcast(rpc->cr_ctx, cont->c_svc, cont->c_uuid); + rc = cont_destroy_bcast(dss_get_module_info()->dmi_ctx, cont->c_svc, cont->c_uuid); if (rc != 0) goto out_prop; @@ -2206,8 +2204,7 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, if (in->coi_flags & (DAOS_COO_EVICT | DAOS_COO_EVICT_ALL)) { rc = evict_hdls(tx, cont, true /* force */, - (in->coi_flags & DAOS_COO_EVICT_ALL) ? NULL : pool_hdl, - rpc->cr_ctx); + (in->coi_flags & DAOS_COO_EVICT_ALL) ? NULL : pool_hdl); if (rc != 0) { daos_prop_free(prop); goto out; @@ -2370,8 +2367,7 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, } static int -cont_close_recs(crt_context_t ctx, struct cont_svc *svc, - struct cont_tgt_close_rec recs[], int nrecs) +cont_close_recs(struct cont_svc *svc, struct cont_tgt_close_rec recs[], int nrecs) { int i; int rc = 0; @@ -2411,7 +2407,7 @@ cont_close_recs(crt_context_t ctx, struct cont_svc *svc, static int cont_close_one_hdl(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont_svc *svc, - crt_context_t ctx, const uuid_t uuid) + const uuid_t uuid) { d_iov_t key; d_iov_t value; @@ -2448,8 +2444,7 @@ cont_close_one_hdl(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont_svc /* Close an array of handles, possibly belonging to different containers. */ static int -cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, - int nrecs, crt_context_t ctx) +cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, int nrecs) { struct rdb_tx tx; struct d_hash_table txs_nhc; /* TX per-container number of handles cache (HT). */ @@ -2463,7 +2458,7 @@ cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, " recs[0].hce="DF_U64"\n", DP_CONT(svc->cs_pool_uuid, NULL), nrecs, DP_UUID(recs[0].tcr_hdl), recs[0].tcr_hce); - rc = cont_close_recs(ctx, svc, recs, nrecs); + rc = cont_close_recs(svc, recs, nrecs); if (rc != 0) { D_ERROR(DF_CONT": failed to close %d recs: "DF_RC"\n", DP_CONT(svc->cs_pool_uuid, NULL), nrecs, DP_RC(rc)); @@ -2485,7 +2480,7 @@ cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, DP_CONT(svc->cs_pool_uuid, NULL), num_tx); for (i = 0; i < nrecs; i++) { - rc = cont_close_one_hdl(&tx, &txs_nhc, svc, ctx, recs[i].tcr_hdl); + rc = cont_close_one_hdl(&tx, &txs_nhc, svc, recs[i].tcr_hdl); if (rc != 0) { D_ERROR(DF_CONT": failed to close handle: "DF_UUID", "DF_RC"\n", DP_CONT(svc->cs_pool_uuid, NULL), DP_UUID(recs[i].tcr_hdl), @@ -2588,11 +2583,11 @@ cont_close(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, DP_CONT(cont->c_svc->cs_pool_uuid, in->cci_op.ci_uuid), DP_UUID(rec.tcr_hdl), rec.tcr_hce); - rc = cont_close_recs(rpc->cr_ctx, cont->c_svc, &rec, 1 /* nrecs */); + rc = cont_close_recs(cont->c_svc, &rec, 1 /* nrecs */); if (rc != 0) D_GOTO(out, rc); - rc = cont_close_one_hdl(tx, NULL /* nhc */, cont->c_svc, rpc->cr_ctx, rec.tcr_hdl); + rc = cont_close_one_hdl(tx, NULL /* nhc */, cont->c_svc, rec.tcr_hdl); /* On success update modify time (except if open specified read-only metadata stats) */ if (rc == 0 && !(chdl.ch_flags & DAOS_COO_RO_MDSTATS)) @@ -3241,7 +3236,7 @@ cont_query(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, if (in->cqi_bits & DAOS_CO_QUERY_TGT) { /* need RF if user query cont_info */ in->cqi_bits |= (DAOS_CO_QUERY_PROP_REDUN_FAC | DAOS_CO_QUERY_PROP_REDUN_LVL); - rc = cont_query_bcast(rpc->cr_ctx, cont, in->cqi_op.ci_pool_hdl, + rc = cont_query_bcast(dss_get_module_info()->dmi_ctx, cont, in->cqi_op.ci_pool_hdl, in->cqi_op.ci_hdl, out); if (rc) return rc; @@ -3900,8 +3895,7 @@ close_iter_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) * and managed by local container services. */ int -ds_cont_close_by_pool_hdls(uuid_t pool_uuid, uuid_t *pool_hdls, int n_pool_hdls, - crt_context_t ctx) +ds_cont_close_by_pool_hdls(uuid_t pool_uuid, uuid_t *pool_hdls, int n_pool_hdls) { struct cont_svc *svc; struct rdb_tx tx; @@ -3939,7 +3933,7 @@ ds_cont_close_by_pool_hdls(uuid_t pool_uuid, uuid_t *pool_hdls, int n_pool_hdls, if (arg.cia_buf.rb_nrecs > 0) rc = cont_close_hdls(svc, arg.cia_buf.rb_recs, - arg.cia_buf.rb_nrecs, ctx); + arg.cia_buf.rb_nrecs); out_buf: recs_buf_fini(&arg.cia_buf); diff --git a/src/container/srv_epoch.c b/src/container/srv_epoch.c index d5c113cade7..c36fdfb45c6 100644 --- a/src/container/srv_epoch.c +++ b/src/container/srv_epoch.c @@ -218,7 +218,7 @@ ds_cont_snap_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, } rc = snap_create_bcast(tx, cont, in->cei_op.ci_hdl, in->cei_opts, - rpc->cr_ctx, &snap_eph); + dss_get_module_info()->dmi_ctx, &snap_eph); if (rc == 0) out->ceo_epoch = snap_eph; out: diff --git a/src/include/daos_srv/container.h b/src/include/daos_srv/container.h index 96addb42bfa..737e04f1e2d 100644 --- a/src/include/daos_srv/container.h +++ b/src/include/daos_srv/container.h @@ -178,7 +178,7 @@ void ds_cont_hdl_put(struct ds_cont_hdl *hdl); void ds_cont_hdl_get(struct ds_cont_hdl *hdl); int ds_cont_close_by_pool_hdls(uuid_t pool_uuid, uuid_t *pool_hdls, - int n_pool_hdls, crt_context_t ctx); + int n_pool_hdls); int ds_cont_local_close(uuid_t cont_hdl_uuid); int ds_cont_child_start_all(struct ds_pool_child *pool_child); diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 9457d3485bd..e99162a8ab1 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -2909,7 +2909,7 @@ ds_pool_connect_handler(crt_rpc_t *rpc, int handler_version) d_tm_inc_counter(metrics->connect_total, 1); if (in->pci_query_bits & DAOS_PO_QUERY_SPACE) - rc = pool_space_query_bcast(rpc->cr_ctx, svc, in->pci_op.pi_hdl, + rc = pool_space_query_bcast(dss_get_module_info()->dmi_ctx, svc, in->pci_op.pi_hdl, &out->pco_space); out_map_version: out->pco_op.po_map_version = ds_pool_get_version(svc->ps_pool); @@ -3011,8 +3011,7 @@ pool_disconnect_hdls(struct rdb_tx *tx, struct pool_svc *svc, uuid_t *hdl_uuids, * TODO: Send POOL_TGT_CLOSE_CONTS and somehow retry until every * container service has responded (through ds_pool). */ - rc = ds_cont_close_by_pool_hdls(svc->ps_uuid, hdl_uuids, n_hdl_uuids, - ctx); + rc = ds_cont_close_by_pool_hdls(svc->ps_uuid, hdl_uuids, n_hdl_uuids); if (rc != 0) D_GOTO(out, rc); @@ -3082,7 +3081,7 @@ ds_pool_disconnect_handler(crt_rpc_t *rpc) } rc = pool_disconnect_hdls(&tx, svc, &pdi->pdi_op.pi_hdl, - 1 /* n_hdl_uuids */, rpc->cr_ctx); + 1 /* n_hdl_uuids */, dss_get_module_info()->dmi_ctx); if (rc != 0) D_GOTO(out_lock, rc); @@ -3759,7 +3758,7 @@ ds_pool_query_handler(crt_rpc_t *rpc, int version) /* See comment above, rebuild doesn't connect the pool */ if ((in->pqi_query_bits & DAOS_PO_QUERY_SPACE) && !is_pool_from_srv(in->pqi_op.pi_uuid, in->pqi_op.pi_hdl)) { - rc = pool_space_query_bcast(rpc->cr_ctx, svc, in->pqi_op.pi_hdl, + rc = pool_space_query_bcast(dss_get_module_info()->dmi_ctx, svc, in->pqi_op.pi_hdl, &out->pqo_space); if (unlikely(rc)) goto out_svc; @@ -3904,7 +3903,7 @@ ds_pool_query_info_handler(crt_rpc_t *rpc) ABT_rwlock_unlock(svc->ps_pool->sp_lock); if (tgt_state == PO_COMP_ST_UPIN) { - rc = pool_query_tgt_space(rpc->cr_ctx, svc, in->pqii_op.pi_hdl, + rc = pool_query_tgt_space(dss_get_module_info()->dmi_ctx, svc, in->pqii_op.pi_hdl, in->pqii_rank, in->pqii_tgt, &out->pqio_space); if (rc) @@ -4579,7 +4578,7 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc, if (n_hdl_uuids > 0) { rc = pool_disconnect_hdls(tx, svc, hdl_uuids, n_hdl_uuids, - rpc->cr_ctx); + dss_get_module_info()->dmi_ctx); if (rc != 0) D_GOTO(out_free, rc); need_commit = true; @@ -6247,7 +6246,7 @@ ds_pool_update_handler(crt_rpc_t *rpc) list.pta_addrs = in->pti_addr_list.ca_arrays; if (opc_get(rpc->cr_opc) == POOL_REINT) { - rc = pool_discard(rpc->cr_ctx, svc, &list); + rc = pool_discard(dss_get_module_info()->dmi_ctx, svc, &list); if (rc) goto out_svc; } @@ -6514,7 +6513,7 @@ ds_pool_evict_handler(crt_rpc_t *rpc) } else { /* Pool evict, or pool destroy with force=true */ rc = pool_disconnect_hdls(&tx, svc, hdl_uuids, - n_hdl_uuids, rpc->cr_ctx); + n_hdl_uuids, dss_get_module_info()->dmi_ctx); if (rc != 0) { D_GOTO(out_free, rc); } else { From 9839c64c3b6ef543ec7ebe56dcc305ca1d4324cc Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 2 Jun 2023 11:29:50 -0600 Subject: [PATCH 26/28] DAOS-13539 control: Fix network related commands for multiprovider (#12255) - By default dmg network scan will include all configured providers, not just the primary provider. - In daos_agent dump-attachinfo, use the agent config as the source for the provider index. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/attachinfo.go | 9 +++++-- src/control/server/ctl_network_rpc.go | 28 ++++++++++------------ src/control/server/ctl_network_rpc_test.go | 3 +-- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/control/cmd/daos_agent/attachinfo.go b/src/control/cmd/daos_agent/attachinfo.go index 65d6499a8f9..69aa3db3dcf 100644 --- a/src/control/cmd/daos_agent/attachinfo.go +++ b/src/control/cmd/daos_agent/attachinfo.go @@ -23,7 +23,7 @@ type dumpAttachInfoCmd struct { ctlInvokerCmd Output string `short:"o" long:"output" default:"stdout" description:"Dump output to this location"` JSON bool `short:"j" long:"json" description:"Enable JSON output"` - ProviderIdx uint `short:"n" long:"provider_idx" description:"Index of provider to fetch (if multiple)"` + ProviderIdx *uint `short:"n" long:"provider_idx" description:"Index of provider to fetch (if multiple)"` } func (cmd *dumpAttachInfoCmd) Execute(_ []string) error { @@ -57,7 +57,12 @@ func (cmd *dumpAttachInfoCmd) Execute(_ []string) error { return err } - ranks, err := getServiceRanksForProviderIdx(resp, int(cmd.ProviderIdx)) + providerIdx := cmd.cfg.ProviderIdx + if cmd.ProviderIdx != nil { + providerIdx = *cmd.ProviderIdx + } + + ranks, err := getServiceRanksForProviderIdx(resp, int(providerIdx)) if err != nil { return err } diff --git a/src/control/server/ctl_network_rpc.go b/src/control/server/ctl_network_rpc.go index 321fa685da8..ffef50421f3 100644 --- a/src/control/server/ctl_network_rpc.go +++ b/src/control/server/ctl_network_rpc.go @@ -18,16 +18,16 @@ import ( // NetworkScan retrieves details of network interfaces on remote hosts. func (c *ControlService) NetworkScan(ctx context.Context, req *ctlpb.NetworkScanReq) (*ctlpb.NetworkScanResp, error) { - provider, err := c.srvCfg.Fabric.GetPrimaryProvider() + providers, err := c.srvCfg.Fabric.GetProviders() if err != nil { return nil, err } switch { case strings.EqualFold(req.GetProvider(), "all"): - provider = "" + providers = []string{} case req.GetProvider() != "": - provider = req.GetProvider() + providers = []string{req.GetProvider()} } topo, err := hwprov.DefaultTopologyProvider(c.log).GetTopology(ctx) @@ -39,12 +39,12 @@ func (c *ControlService) NetworkScan(ctx context.Context, req *ctlpb.NetworkScan return nil, err } - result, err := c.fabric.Scan(ctx, provider) + result, err := c.fabric.Scan(ctx, providers...) if err != nil { return nil, err } - resp := c.fabricInterfaceSetToNetworkScanResp(result, provider) + resp := c.fabricInterfaceSetToNetworkScanResp(result) resp.Numacount = int32(topo.NumNUMANodes()) resp.Corespernuma = int32(topo.NumCoresPerNUMA()) @@ -52,7 +52,7 @@ func (c *ControlService) NetworkScan(ctx context.Context, req *ctlpb.NetworkScan return resp, nil } -func (c *ControlService) fabricInterfaceSetToNetworkScanResp(fis *hardware.FabricInterfaceSet, provider string) *ctlpb.NetworkScanResp { +func (c *ControlService) fabricInterfaceSetToNetworkScanResp(fis *hardware.FabricInterfaceSet) *ctlpb.NetworkScanResp { resp := new(ctlpb.NetworkScanResp) resp.Interfaces = make([]*ctlpb.FabricInterface, 0, fis.NumNetDevices()) for _, name := range fis.Names() { @@ -68,15 +68,13 @@ func (c *ControlService) fabricInterfaceSetToNetworkScanResp(fis *hardware.Fabri for _, hwFI := range fi.NetInterfaces.ToSlice() { for _, prov := range fi.Providers.ToSlice() { - if provider == "" || provider == prov.Name { - resp.Interfaces = append(resp.Interfaces, &ctlpb.FabricInterface{ - Provider: prov.Name, - Device: hwFI, - Numanode: uint32(fi.NUMANode), - Netdevclass: uint32(fi.DeviceClass), - Priority: uint32(prov.Priority), - }) - } + resp.Interfaces = append(resp.Interfaces, &ctlpb.FabricInterface{ + Provider: prov.Name, + Device: hwFI, + Numanode: uint32(fi.NUMANode), + Netdevclass: uint32(fi.DeviceClass), + Priority: uint32(prov.Priority), + }) } } } diff --git a/src/control/server/ctl_network_rpc_test.go b/src/control/server/ctl_network_rpc_test.go index 434933c4973..4a3a1641689 100644 --- a/src/control/server/ctl_network_rpc_test.go +++ b/src/control/server/ctl_network_rpc_test.go @@ -22,7 +22,6 @@ import ( func TestServer_ControlService_fabricInterfaceSetToNetworkScanResp(t *testing.T) { for name, tc := range map[string]struct { fis *hardware.FabricInterfaceSet - provider string expResult *ctlpb.NetworkScanResp }{ "empty": { @@ -156,7 +155,7 @@ func TestServer_ControlService_fabricInterfaceSetToNetworkScanResp(t *testing.T) cs := mockControlService(t, log, config.DefaultServer(), nil, nil, nil) - result := cs.fabricInterfaceSetToNetworkScanResp(tc.fis, tc.provider) + result := cs.fabricInterfaceSetToNetworkScanResp(tc.fis) if diff := cmp.Diff(tc.expResult, result, test.DefaultCmpOpts()...); diff != "" { t.Fatalf("(-want, +got)\n%s\n", diff) From 578aa3743b09fcb1cd9ae031601e301b4da1e718 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Wed, 14 Jun 2023 12:14:54 -0600 Subject: [PATCH 27/28] DAOS-7029 control: Add refresh methods for agent cache (#12370) * DAOS-7029 control: Add refresh methods for agent cache (#12150) * Add agent config option for a cache refresh interval. The agent will perform a lazy cache refresh when the configured interval elapses. * Allow users to force-refresh the caches with SIGUSR2. Signed-off-by: Kris Jacque * DAOS-7029 control: Quiet noisy fabric ready logging (#12298) - Move fabric ready check into InfoCache so it is only performed when needed. - Quiet some other noisy logging. Signed-off-by: Kris Jacque --- src/control/cmd/daos_agent/config.go | 19 +- src/control/cmd/daos_agent/config_test.go | 5 +- src/control/cmd/daos_agent/fabric.go | 2 +- src/control/cmd/daos_agent/fabric_test.go | 10 + src/control/cmd/daos_agent/infocache.go | 453 ++++- src/control/cmd/daos_agent/infocache_test.go | 1567 +++++++++++++----- src/control/cmd/daos_agent/mgmt_rpc.go | 118 +- src/control/cmd/daos_agent/mgmt_rpc_test.go | 834 +++------- src/control/cmd/daos_agent/start.go | 61 +- src/control/lib/cache/cache.go | 220 +++ src/control/lib/cache/cache_test.go | 563 +++++++ src/control/lib/hardware/fabric.go | 2 +- src/control/lib/hardware/fabric_test.go | 20 +- src/control/lib/hardware/mocks.go | 62 +- utils/config/daos_agent.yml | 6 + 15 files changed, 2683 insertions(+), 1259 deletions(-) create mode 100644 src/control/lib/cache/cache.go create mode 100644 src/control/lib/cache/cache_test.go diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index bab2717d7f8..d41b6e79d74 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,6 +9,7 @@ package main import ( "fmt" "io/ioutil" + "time" "github.com/pkg/errors" "gopkg.in/yaml.v2" @@ -24,6 +25,21 @@ const ( defaultLogFile = "/tmp/daos_agent.log" ) +type refreshMinutes time.Duration + +func (rm *refreshMinutes) UnmarshalYAML(unmarshal func(interface{}) error) error { + var mins uint + if err := unmarshal(&mins); err != nil { + return err + } + *rm = refreshMinutes(time.Duration(mins) * time.Minute) + return nil +} + +func (rm refreshMinutes) Duration() time.Duration { + return time.Duration(rm) +} + // Config defines the agent configuration. type Config struct { SystemName string `yaml:"name"` @@ -34,6 +50,7 @@ type Config struct { LogLevel common.ControlLogLevel `yaml:"control_log_mask,omitempty"` TransportConfig *security.TransportConfig `yaml:"transport_config"` DisableCache bool `yaml:"disable_caching,omitempty"` + CacheExpiration refreshMinutes `yaml:"cache_expiration,omitempty"` DisableAutoEvict bool `yaml:"disable_auto_evict,omitempty"` ExcludeFabricIfaces common.StringSet `yaml:"exclude_fabric_ifaces,omitempty"` FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index 71a7b91cc92..4a46c3ef14b 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2021-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,6 +8,7 @@ package main import ( "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -43,6 +44,7 @@ runtime_dir: /tmp/runtime log_file: /home/frodo/logfile control_log_mask: debug disable_caching: true +cache_expiration: 30 disable_auto_evict: true transport_config: allow_insecure: true @@ -129,6 +131,7 @@ transport_config: LogFile: "/home/frodo/logfile", LogLevel: common.ControlLogLevelDebug, DisableCache: true, + CacheExpiration: refreshMinutes(30 * time.Minute), DisableAutoEvict: true, TransportConfig: &security.TransportConfig{ AllowInsecure: true, diff --git a/src/control/cmd/daos_agent/fabric.go b/src/control/cmd/daos_agent/fabric.go index 8796443dca7..dd170f5cd6d 100644 --- a/src/control/cmd/daos_agent/fabric.go +++ b/src/control/cmd/daos_agent/fabric.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2023 Intel Corporation. +// (C) Copyright 2021-2022 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/daos_agent/fabric_test.go b/src/control/cmd/daos_agent/fabric_test.go index bfe0c4cc587..2edc155dc1d 100644 --- a/src/control/cmd/daos_agent/fabric_test.go +++ b/src/control/cmd/daos_agent/fabric_test.go @@ -22,6 +22,16 @@ import ( var fiCmpOpt = cmpopts.IgnoreUnexported(FabricInterface{}) +func testFabricProviderSet(prov ...string) *hardware.FabricProviderSet { + providers := []*hardware.FabricProvider{} + for _, p := range prov { + providers = append(providers, &hardware.FabricProvider{ + Name: p, + }) + } + return hardware.NewFabricProviderSet(providers...) +} + func TestAgent_NewNUMAFabric(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go index 3a0ab220bd3..831ba3e6829 100644 --- a/src/control/cmd/daos_agent/infocache.go +++ b/src/control/cmd/daos_agent/infocache.go @@ -8,176 +8,445 @@ package main import ( "context" + "net" + "strings" "sync" + "time" "github.com/pkg/errors" - "google.golang.org/protobuf/proto" - mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" + "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/lib/atm" + "github.com/daos-stack/daos/src/control/lib/cache" + "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/hardware" + "github.com/daos-stack/daos/src/control/lib/hardware/hwprov" "github.com/daos-stack/daos/src/control/logging" ) -// NotCachedErr is the error returned when trying to fetch data that is not cached. -var NotCachedErr = errors.New("not cached") +const ( + attachInfoKey = "GetAttachInfo" + fabricKey = "NUMAFabric" +) + +type getAttachInfoFn func(ctx context.Context, rpcClient control.UnaryInvoker, req *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) +type fabricScanFn func(ctx context.Context, providers ...string) (*NUMAFabric, error) + +// NewInfoCache creates a new InfoCache with appropriate parameters set. +func NewInfoCache(ctx context.Context, log logging.Logger, client control.UnaryInvoker, cfg *Config) *InfoCache { + ic := &InfoCache{ + log: log, + ignoreIfaces: cfg.ExcludeFabricIfaces, + client: client, + cache: cache.NewItemCache(log), + getAttachInfo: control.GetAttachInfo, + fabricScan: getFabricScanFn(log, cfg, hwprov.DefaultFabricScanner(log)), + netIfaces: net.Interfaces, + devClassGetter: hwprov.DefaultNetDevClassProvider(log), + devStateGetter: hwprov.DefaultNetDevStateProvider(log), + } -func newAttachInfoCache(log logging.Logger, enabled bool) *attachInfoCache { - return &attachInfoCache{ - log: log, - enabled: atm.NewBool(enabled), + if cfg.DisableCache { + ic.DisableAttachInfoCache() + ic.DisableFabricCache() + return ic } + + ic.EnableAttachInfoCache(time.Duration(cfg.CacheExpiration)) + if len(cfg.FabricInterfaces) > 0 { + nf := NUMAFabricFromConfig(log, cfg.FabricInterfaces) + ic.EnableStaticFabricCache(ctx, nf) + } else { + ic.EnableFabricCache() + } + + return ic } -type attachInfoCache struct { - mutex sync.Mutex +func getFabricScanFn(log logging.Logger, cfg *Config, scanner *hardware.FabricScanner) fabricScanFn { + return func(ctx context.Context, provs ...string) (*NUMAFabric, error) { + fis, err := scanner.Scan(ctx, provs...) + if err != nil { + return nil, err + } + return NUMAFabricFromScan(ctx, log, fis).WithIgnoredDevices(cfg.ExcludeFabricIfaces), nil + } +} - log logging.Logger - enabled atm.Bool - initialized atm.Bool +type cacheItem struct { + sync.Mutex + lastCached time.Time + refreshInterval time.Duration +} - // cached response from remote server - attachInfo *mgmtpb.GetAttachInfoResp +func (ci *cacheItem) isStale() bool { + if ci.refreshInterval == 0 { + return false + } + return ci.lastCached.Add(ci.refreshInterval).Before(time.Now()) } -func (c *attachInfoCache) isCached() bool { - return c.initialized.IsTrue() +func (ci *cacheItem) isCached() bool { + return !ci.lastCached.Equal(time.Time{}) } -func (c *attachInfoCache) isEnabled() bool { - return c.enabled.IsTrue() +type cachedAttachInfo struct { + cacheItem + fetch getAttachInfoFn + system string + rpcClient control.UnaryInvoker + lastResponse *control.GetAttachInfoResp } -func (c *attachInfoCache) getAttachInfoResp() (*mgmtpb.GetAttachInfoResp, error) { - if !c.isCached() { - return nil, NotCachedErr +func newCachedAttachInfo(refreshInterval time.Duration, system string, rpcClient control.UnaryInvoker, fetchFn getAttachInfoFn) *cachedAttachInfo { + return &cachedAttachInfo{ + cacheItem: cacheItem{ + refreshInterval: refreshInterval, + }, + fetch: fetchFn, + system: system, + rpcClient: rpcClient, } +} - aiCopy := proto.Clone(c.attachInfo) - return aiCopy.(*mgmtpb.GetAttachInfoResp), nil +func sysAttachInfoKey(sys string) string { + return attachInfoKey + "-" + sys } -type getAttachInfoFn func(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) +// Key returns the key for this system-specific instance of GetAttachInfo. +func (ci *cachedAttachInfo) Key() string { + if ci == nil { + return "" + } + if ci.system == "" { + return attachInfoKey + } + return sysAttachInfoKey(ci.system) +} -// Get is responsible for returning a GetAttachInfo response, either from the cache or from -// the remote server if the cache is disabled. -func (c *attachInfoCache) Get(ctx context.Context, numaNode int, sys string, getRemote getAttachInfoFn) (*mgmtpb.GetAttachInfoResp, error) { - c.mutex.Lock() - defer c.mutex.Unlock() +// NeedsRefresh checks whether the cached data needs to be refreshed. +func (ci *cachedAttachInfo) NeedsRefresh() bool { + if ci == nil { + return false + } + return !ci.isCached() || ci.isStale() +} - if c.isEnabled() && c.isCached() { - return c.getAttachInfoResp() +// Refresh contacts the remote management server and refreshes the GetAttachInfo cache. +func (ci *cachedAttachInfo) Refresh(ctx context.Context) error { + if ci == nil { + return errors.New("cachedAttachInfo is nil") } - attachInfo, err := getRemote(ctx, numaNode, sys) + ci.Lock() + defer ci.Unlock() + + req := &control.GetAttachInfoReq{System: ci.system, AllRanks: true} + resp, err := ci.fetch(ctx, ci.rpcClient, req) if err != nil { - return nil, err + return errors.Wrap(err, "refreshing cached attach info failed") } - if !c.isEnabled() { - return attachInfo, nil - } + ci.lastResponse = resp + ci.lastCached = time.Now() + return nil +} - c.attachInfo = attachInfo - c.initialized.SetTrue() +type cachedFabricInfo struct { + cacheItem + fetch fabricScanFn + lastResults *NUMAFabric +} - return c.getAttachInfoResp() +func newCachedFabricInfo(log logging.Logger, fetchFn fabricScanFn) *cachedFabricInfo { + return &cachedFabricInfo{ + fetch: fetchFn, + } } -func newLocalFabricCache(log logging.Logger, enabled bool) *localFabricCache { - return &localFabricCache{ - log: log, - localNUMAFabric: newNUMAFabric(log), - enabled: atm.NewBool(enabled), +// Key returns the cache key for the fabric information. +func (cfi *cachedFabricInfo) Key() string { + return fabricKey +} + +// NeedsRefresh indicates that the fabric information does not need to be refreshed unless it has +// never been populated. +func (cfi *cachedFabricInfo) NeedsRefresh() bool { + if cfi == nil { + return false } + return !cfi.isCached() } -type localFabricCache struct { - mutex sync.RWMutex +// Refresh scans the hardware for information about the fabric devices and caches the result. +func (cfi *cachedFabricInfo) Refresh(ctx context.Context) error { + if cfi == nil { + return errors.New("cachedFabricInfo is nil") + } + + cfi.Lock() + defer cfi.Unlock() + + results, err := cfi.fetch(ctx) + if err != nil { + return errors.Wrap(err, "refreshing cached fabric info") + } - log logging.Logger - enabled atm.Bool - initialized atm.Bool - cfg *Config + cfi.lastResults = results + cfi.lastCached = time.Now() + return nil +} - // cached fabric interfaces organized by NUMA affinity - localNUMAFabric *NUMAFabric +// InfoCache is a cache for the results of expensive operations needed by the agent. +type InfoCache struct { + log logging.Logger + cache *cache.ItemCache + fabricCacheDisabled atm.Bool + attachInfoCacheDisabled atm.Bool + + getAttachInfo getAttachInfoFn + fabricScan fabricScanFn + netIfaces func() ([]net.Interface, error) + devClassGetter hardware.NetDevClassProvider + devStateGetter hardware.NetDevStateProvider + + client control.UnaryInvoker + attachInfoRefresh time.Duration + providers common.StringSet + ignoreIfaces common.StringSet } -// WithConfig adds a config file for the cache to use. -func (c *localFabricCache) WithConfig(cfg *Config) *localFabricCache { - c.cfg = cfg - return c +// AddProvider adds a fabric provider to the scan list. +func (c *InfoCache) AddProvider(prov string) { + if c == nil || prov == "" { + return + } + if c.providers == nil { + c.providers = common.NewStringSet() + } + c.providers.Add(prov) } -// IsEnabled reports whether the cache is enabled. -func (c *localFabricCache) IsEnabled() bool { +// IsAttachInfoEnabled checks whether the GetAttachInfo cache is enabled. +func (c *InfoCache) IsAttachInfoCacheEnabled() bool { if c == nil { return false } + return !c.attachInfoCacheDisabled.Load() +} + +// DisableAttachInfoCache fully disables the attach info cache. +func (c *InfoCache) DisableAttachInfoCache() { + if c == nil { + return + } + c.attachInfoCacheDisabled.Store(true) +} - return c.enabled.IsTrue() +// EnableAttachInfoCache enables a refreshable GetAttachInfo cache. +func (c *InfoCache) EnableAttachInfoCache(interval time.Duration) { + if c == nil { + return + } + c.attachInfoRefresh = interval + c.attachInfoCacheDisabled.Store(false) } -// IsCached reports whether there is data in the cache. -func (c *localFabricCache) IsCached() bool { +// IsFabricCacheEnabled checks whether the NUMAFabric cache is enabled. +func (c *InfoCache) IsFabricCacheEnabled() bool { if c == nil { return false } + return !c.fabricCacheDisabled.Load() +} + +// DisableFabricCache fully disables the fabric device cache. +func (c *InfoCache) DisableFabricCache() { + if c == nil { + return + } + c.fabricCacheDisabled.Store(true) +} - return c.initialized.IsTrue() +// EnableFabricCache enables a refreshable local fabric cache. +func (c *InfoCache) EnableFabricCache() { + if c == nil { + return + } + c.fabricCacheDisabled.Store(false) } -// Cache caches the results of a fabric scan locally. -func (c *localFabricCache) CacheScan(ctx context.Context, scan *hardware.FabricInterfaceSet) { +// EnableStaticFabricCache sets up a fabric cache based on a static value that cannot be refreshed. +func (c *InfoCache) EnableStaticFabricCache(ctx context.Context, nf *NUMAFabric) { if c == nil { return } - c.mutex.Lock() - defer c.mutex.Unlock() + item := &cachedFabricInfo{ + cacheItem: cacheItem{ + lastCached: time.Now(), + }, + fetch: func(context.Context, ...string) (*NUMAFabric, error) { + return nf, nil + }, + lastResults: nf, + } + if err := c.cache.Set(item); err != nil { + c.log.Errorf("error setting static fabric cache: %v", err) + } + c.EnableFabricCache() +} + +// GetAttachInfo fetches the attach info from the cache, and refreshes if necessary. +func (c *InfoCache) GetAttachInfo(ctx context.Context, sys string) (*control.GetAttachInfoResp, error) { + if c == nil { + return nil, errors.New("InfoCache is nil") + } - scanResult := NUMAFabricFromScan(ctx, c.log, scan) - c.setCache(scanResult) + if !c.IsAttachInfoCacheEnabled() { + return c.getAttachInfoRemote(ctx, sys) + } + + // Use the default system if none is specified. + if sys == "" { + sys = build.DefaultSystemName + } + createItem := func() (cache.Item, error) { + c.log.Debugf("cache miss for %s", sysAttachInfoKey(sys)) + return newCachedAttachInfo(c.attachInfoRefresh, sys, c.client, c.getAttachInfo), nil + } + + item, release, err := c.cache.GetOrCreate(ctx, sysAttachInfoKey(sys), createItem) + defer release() + if err != nil { + return nil, errors.Wrap(err, "getting attach info from cache") + } + + cai, ok := item.(*cachedAttachInfo) + if !ok { + return nil, errors.Errorf("unexpected attach info data type %T", item) + } + + return cai.lastResponse, nil } -// Cache initializes the cache with a specific NUMAFabric. -func (c *localFabricCache) Cache(ctx context.Context, nf *NUMAFabric) { - if c == nil || nf == nil { - return +func (c *InfoCache) getAttachInfoRemote(ctx context.Context, sys string) (*control.GetAttachInfoResp, error) { + c.log.Debug("GetAttachInfo not cached, fetching directly from MS") + // Ask the MS for _all_ info, regardless of pbReq.AllRanks, so that the + // cache can serve future "pbReq.AllRanks == true" requests. + req := new(control.GetAttachInfoReq) + req.SetSystem(sys) + req.AllRanks = true + resp, err := c.getAttachInfo(ctx, c.client, req) + if err != nil { + return nil, errors.Wrapf(err, "GetAttachInfo %+v", req) } - c.mutex.Lock() - defer c.mutex.Unlock() + if resp.ClientNetHint.Provider == "" { + return nil, errors.New("GetAttachInfo response contained no provider") + } + return resp, nil +} + +// GetFabricDevice returns an appropriate fabric device from the cache based on the requested parameters, +// and refreshes the cache if necessary. +func (c *InfoCache) GetFabricDevice(ctx context.Context, params *FabricIfaceParams) (*FabricInterface, error) { + if c == nil { + return nil, errors.New("InfoCache is nil") + } + nf, err := c.getNUMAFabric(ctx, params.DevClass, params.Provider) + if err != nil { + return nil, err + } - c.setCache(nf) + if params.Interface != "" { + fi, err := nf.FindDevice(params) + if err != nil { + return nil, err + } + return fi[0], nil + } + return nf.GetDevice(params) } -func (c *localFabricCache) setCache(nf *NUMAFabric) { - if !c.IsEnabled() { - return +func (c *InfoCache) getNUMAFabric(ctx context.Context, netDevClass hardware.NetDevClass, providers ...string) (*NUMAFabric, error) { + if !c.IsFabricCacheEnabled() { + c.log.Debug("NUMAFabric not cached, rescanning") + if err := c.waitFabricReady(ctx, netDevClass); err != nil { + return nil, err + } + return c.fabricScan(ctx, providers...) } - if c.cfg == nil { - c.localNUMAFabric = nf - } else { - c.localNUMAFabric = nf.WithIgnoredDevices(c.cfg.ExcludeFabricIfaces) + createItem := func() (cache.Item, error) { + c.log.Debug("NUMAFabric cache miss") + if err := c.waitFabricReady(ctx, netDevClass); err != nil { + return nil, err + } + return newCachedFabricInfo(c.log, c.fabricScan), nil + } + + item, release, err := c.cache.GetOrCreate(ctx, fabricKey, createItem) + defer release() + if err != nil { + return nil, errors.Wrap(err, "getting fabric scan from cache") + } + + cfi, ok := item.(*cachedFabricInfo) + if !ok { + return nil, errors.Errorf("unexpected fabric data type %T", item) + } + + return cfi.lastResults, nil +} + +func (c *InfoCache) waitFabricReady(ctx context.Context, netDevClass hardware.NetDevClass) error { + ifaces, err := c.netIfaces() + if err != nil { + return errors.Wrap(err, "getting net interfaces") + } + + var needIfaces []string + for _, iface := range ifaces { + devClass, err := c.devClassGetter.GetNetDevClass(iface.Name) + if err != nil { + return errors.Wrapf(err, "getting device class for %s", iface.Name) + } + if devClass == netDevClass { + needIfaces = append(needIfaces, iface.Name) + } } - c.initialized.SetTrue() + return hardware.WaitFabricReady(ctx, c.log, hardware.WaitFabricReadyParams{ + StateProvider: c.devStateGetter, + FabricIfaces: needIfaces, + IgnoreUnusable: true, + IterationSleep: time.Second, + }) } -// GetDevices fetches an appropriate fabric device from the cache. -func (c *localFabricCache) GetDevice(params *FabricIfaceParams) (*FabricInterface, error) { +// Refresh forces any enabled, refreshable caches to re-fetch their content immediately. +func (c *InfoCache) Refresh(ctx context.Context) error { if c == nil { - return nil, NotCachedErr + return errors.New("InfoCache is nil") } - c.mutex.RLock() - defer c.mutex.RUnlock() + if !c.IsAttachInfoCacheEnabled() && !c.IsFabricCacheEnabled() { + return errors.New("all caches are disabled") + } - if !c.IsCached() { - return nil, NotCachedErr + keys := []string{} + if c.IsFabricCacheEnabled() && c.cache.Has(fabricKey) { + keys = append(keys, fabricKey) + } + if c.IsAttachInfoCacheEnabled() { + for _, k := range c.cache.Keys() { + if strings.HasPrefix(k, attachInfoKey) { + keys = append(keys, k) + } + } } - return c.localNUMAFabric.GetDevice(params) + c.log.Debugf("refreshing cache keys: %+v", keys) + return c.cache.Refresh(ctx, keys...) } diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go index 80d8b223bd3..c1f88bfb01e 100644 --- a/src/control/cmd/daos_agent/infocache_test.go +++ b/src/control/cmd/daos_agent/infocache_test.go @@ -8,560 +8,1363 @@ package main import ( "context" + "net" "testing" + "time" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/pkg/errors" - + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" - mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" - "github.com/daos-stack/daos/src/control/lib/atm" + "github.com/daos-stack/daos/src/control/lib/cache" + "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/hardware" "github.com/daos-stack/daos/src/control/logging" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/pkg/errors" ) -func testFabricProviderSet(prov ...string) *hardware.FabricProviderSet { - providers := []*hardware.FabricProvider{} - for _, p := range prov { - providers = append(providers, &hardware.FabricProvider{ - Name: p, +type testInfoCacheParams struct { + mockGetAttachInfo getAttachInfoFn + mockScanFabric fabricScanFn + mockNetIfaces func() ([]net.Interface, error) + mockNetDevClassGetter hardware.NetDevClassProvider + mockNetDevStateGetter hardware.NetDevStateProvider + disableFabricCache bool + disableAttachInfoCache bool + ctlInvoker control.Invoker + cachedItems []cache.Item +} + +func newTestInfoCache(t *testing.T, log logging.Logger, params testInfoCacheParams) *InfoCache { + c := cache.NewItemCache(log) + for _, item := range params.cachedItems { + c.Set(item) + } + + ic := &InfoCache{ + log: log, + getAttachInfo: params.mockGetAttachInfo, + fabricScan: params.mockScanFabric, + devClassGetter: params.mockNetDevClassGetter, + devStateGetter: params.mockNetDevStateGetter, + netIfaces: params.mockNetIfaces, + client: params.ctlInvoker, + cache: c, + } + + if ic.netIfaces == nil { + ic.netIfaces = func() ([]net.Interface, error) { + return []net.Interface{ + {Name: "test0"}, + {Name: "test1"}, + }, nil + } + } + + if ic.devClassGetter == nil { + ic.devClassGetter = &hardware.MockNetDevClassProvider{ + GetNetDevClassReturn: []hardware.MockGetNetDevClassResult{ + { + NDC: hardware.Ether, + }, + }, + } + } + + if ic.devStateGetter == nil { + ic.devStateGetter = &hardware.MockNetDevStateProvider{ + GetStateReturn: []hardware.MockNetDevStateResult{ + { + State: hardware.NetDevStateReady, + }, + }, + } + } + + if params.disableAttachInfoCache { + ic.DisableAttachInfoCache() + } else { + ic.EnableAttachInfoCache(0) + } + if params.disableFabricCache { + ic.DisableFabricCache() + } else { + ic.EnableFabricCache() + } + return ic +} + +func TestAgent_newCachedAttachInfo(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + expSys := "my_system" + expRefreshInterval := time.Second + expClient := control.NewMockInvoker(log, &control.MockInvokerConfig{}) + + ai := newCachedAttachInfo(expRefreshInterval, expSys, expClient, + func(ctx context.Context, rpcClient control.UnaryInvoker, req *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, nil }) + + test.AssertEqual(t, expSys, ai.system, "") + test.AssertEqual(t, expRefreshInterval, ai.refreshInterval, "") + test.AssertEqual(t, expClient, ai.rpcClient, "") + test.AssertEqual(t, time.Time{}, ai.lastCached, "") + if ai.lastResponse != nil { + t.Fatalf("expected nothing cached, found:\n%+v", ai.lastResponse) + } + if ai.fetch == nil { + t.Fatalf("expected refresh function to be non-nil") } - return hardware.NewFabricProviderSet(providers...) } -func TestAgent_newAttachInfoCache(t *testing.T) { +func TestAgent_cachedAttachInfo_Key(t *testing.T) { for name, tc := range map[string]struct { - enabled bool + ai *cachedAttachInfo + expResult string }{ - "enabled": { - enabled: true, + "nil": {}, + "no system name": { + ai: newCachedAttachInfo(0, "", nil, nil), + expResult: "GetAttachInfo", }, - "disabled": { - enabled: false, + "system name": { + ai: newCachedAttachInfo(0, "my_system", nil, nil), + expResult: "GetAttachInfo-my_system", + }, + } { + t.Run(name, func(t *testing.T) { + test.AssertEqual(t, tc.expResult, tc.ai.Key(), "") + }) + } +} + +func TestAgent_cachedAttachInfo_NeedsRefresh(t *testing.T) { + for name, tc := range map[string]struct { + ai *cachedAttachInfo + expResult bool + }{ + "nil": {}, + "never cached": { + ai: newCachedAttachInfo(0, "test", nil, nil), + expResult: true, + }, + "no refresh": { + ai: &cachedAttachInfo{ + cacheItem: cacheItem{ + lastCached: time.Now().Add(-time.Minute), + }, + lastResponse: &control.GetAttachInfoResp{}, + }, + }, + "expired": { + ai: &cachedAttachInfo{ + cacheItem: cacheItem{ + lastCached: time.Now().Add(-time.Minute), + refreshInterval: time.Second, + }, + lastResponse: &control.GetAttachInfoResp{}, + }, + expResult: true, + }, + "not expired": { + ai: &cachedAttachInfo{ + cacheItem: cacheItem{ + lastCached: time.Now().Add(-time.Second), + refreshInterval: time.Minute, + }, + lastResponse: &control.GetAttachInfoResp{}, + }, + }, + } { + t.Run(name, func(t *testing.T) { + test.AssertEqual(t, tc.expResult, tc.ai.NeedsRefresh(), "") + }) + } +} + +func TestAgent_cachedAttachInfo_Refresh(t *testing.T) { + resp1 := &control.GetAttachInfoResp{ + System: "resp1", + ServiceRanks: []*control.PrimaryServiceRank{ + { + Rank: 1, + Uri: "rank one", + }, + { + Rank: 2, + Uri: "rank two", + }, + }, + MSRanks: []uint32{0, 1, 2}, + ClientNetHint: control.ClientNetworkHint{ + Provider: "prov", + NetDevClass: uint32(hardware.Ether), + }, + } + + resp2 := &control.GetAttachInfoResp{ + System: "resp2", + ServiceRanks: []*control.PrimaryServiceRank{ + { + Rank: 3, + Uri: "rank three", + }, + { + Rank: 4, + Uri: "rank four", + }, + }, + MSRanks: []uint32{1, 3}, + ClientNetHint: control.ClientNetworkHint{ + Provider: "other", + NetDevClass: uint32(hardware.Infiniband), + }, + } + + for name, tc := range map[string]struct { + nilCache bool + ctlResult *control.GetAttachInfoResp + ctlErr error + alreadyCached *control.GetAttachInfoResp + expErr error + expCached *control.GetAttachInfoResp + }{ + "nil": { + nilCache: true, + expErr: errors.New("nil"), + }, + "GetAttachInfo fails": { + ctlErr: errors.New("mock GetAttachInfo"), + expErr: errors.New("mock GetAttachInfo"), + }, + "not initialized": { + ctlResult: resp1, + expCached: resp1, + }, + "previously cached": { + ctlResult: resp2, + alreadyCached: resp1, + expCached: resp2, + }, + } { + t.Run(name, func(t *testing.T) { + var ai *cachedAttachInfo + if !tc.nilCache { + ai = newCachedAttachInfo(0, "test", control.DefaultClient(), + func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return tc.ctlResult, tc.ctlErr + }) + ai.lastResponse = tc.alreadyCached + if ai.lastResponse != nil { + ai.lastCached = time.Now() + } + } + + err := ai.Refresh(test.Context(t)) + + test.CmpErr(t, tc.expErr, err) + + if ai == nil { + return + } + + if diff := cmp.Diff(tc.expCached, ai.lastResponse); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} + +func TestAgent_newCachedFabricInfo(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + cfi := newCachedFabricInfo(log, func(ctx context.Context, providers ...string) (*NUMAFabric, error) { + return nil, nil + }) + + test.AssertEqual(t, time.Duration(0), cfi.refreshInterval, "") + test.AssertEqual(t, time.Time{}, cfi.lastCached, "") + if cfi.lastResults != nil { + t.Fatalf("expected nothing cached, found:\n%+v", cfi.lastResults) + } + if cfi.fetch == nil { + t.Fatalf("expected refresh function to be non-nil") + } +} + +func TestAgent_cachedFabricInfo_Key(t *testing.T) { + for name, tc := range map[string]struct { + cfi *cachedFabricInfo + }{ + "nil": {}, + "normal": { + cfi: newCachedFabricInfo(nil, nil), + }, + } { + t.Run(name, func(t *testing.T) { + // should always be the same + test.AssertEqual(t, fabricKey, tc.cfi.Key(), "") + }) + } +} + +func TestAgent_cachedFabricInfo_NeedsRefresh(t *testing.T) { + for name, tc := range map[string]struct { + nilCache bool + cacheTime time.Time + expResult bool + }{ + "nil": { + nilCache: true, + }, + "not initialized": { + expResult: true, + }, + "initialized": { + cacheTime: time.Now().Add(-time.Minute), }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - cache := newAttachInfoCache(log, tc.enabled) - - if cache == nil { - t.Fatal("expected non-nil cache") + var cfi *cachedFabricInfo + if !tc.nilCache { + cfi = newCachedFabricInfo(log, nil) + cfi.cacheItem.lastCached = tc.cacheTime } - test.AssertEqual(t, log, cache.log, "") - test.AssertEqual(t, tc.enabled, cache.isEnabled(), "isEnabled()") - test.AssertFalse(t, cache.isCached(), "default state is uncached") + test.AssertEqual(t, tc.expResult, cfi.NeedsRefresh(), "") }) } } -func TestAgent_attachInfoCache_Get(t *testing.T) { - srvResp := &mgmtpb.GetAttachInfoResp{ - Status: -1000, - RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - {Rank: 1, Uri: "firsturi"}, - {Rank: 2, Uri: "nexturi"}, +func TestAgent_cachedFabricInfo_Refresh(t *testing.T) { + scan1 := map[int][]*FabricInterface{ + 2: { + {Name: "two"}, + }, + } + scan2 := map[int][]*FabricInterface{ + 1: { + {Name: "one"}, + }, + 3: { + {Name: "three"}, }, } for name, tc := range map[string]struct { - aic *attachInfoCache - cache *mgmtpb.GetAttachInfoResp - expCached bool - expRemote bool - remoteErr bool - expErr error + nilCache bool + disabled bool + fabricResult map[int][]*FabricInterface + fabricErr error + alreadyCached map[int][]*FabricInterface + expErr error + expCached map[int][]*FabricInterface }{ - "not enabled": { - aic: &attachInfoCache{}, - expRemote: true, + "nil": { + nilCache: true, + expErr: errors.New("nil"), }, - "not cached": { - aic: &attachInfoCache{enabled: atm.NewBool(true)}, - expRemote: true, - expCached: true, + "fabric scan fails": { + fabricErr: errors.New("mock fabric scan"), + expErr: errors.New("mock fabric scan"), }, - "cached": { - aic: &attachInfoCache{enabled: atm.NewBool(true)}, - cache: srvResp, - expCached: true, + "not initialized": { + fabricResult: scan1, + expCached: scan1, }, - "remote fails": { - aic: &attachInfoCache{enabled: atm.NewBool(true)}, - expRemote: true, - remoteErr: true, - expErr: errors.New("no soup for you"), + "previously cached": { + fabricResult: scan2, + alreadyCached: scan1, + expCached: scan2, }, } { t.Run(name, func(t *testing.T) { - if tc.cache != nil { - tc.aic.attachInfo = tc.cache - tc.aic.initialized.SetTrue() - } + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) - if tc.aic == nil { - return + var cfi *cachedFabricInfo + if !tc.nilCache { + cfi = newCachedFabricInfo(log, nil) + cfi.fetch = func(_ context.Context, _ ...string) (*NUMAFabric, error) { + if tc.fabricResult != nil { + return &NUMAFabric{ + numaMap: tc.fabricResult, + }, nil + } + return nil, tc.fabricErr + } + if tc.alreadyCached != nil { + cfi.lastResults = &NUMAFabric{ + numaMap: tc.alreadyCached, + } + cfi.lastCached = time.Now() + } } - numaNode := 42 - sysName := "snekSezSyss" - remoteInvoked := atm.NewBool(false) - getFn := func(_ context.Context, node int, name string) (*mgmtpb.GetAttachInfoResp, error) { - test.AssertEqual(t, numaNode, node, "node was not supplied") - test.AssertEqual(t, sysName, name, "name was not supplied") + err := cfi.Refresh(test.Context(t)) - remoteInvoked.SetTrue() - if tc.remoteErr { - return nil, tc.expErr - } - return srvResp, nil + test.CmpErr(t, tc.expErr, err) + + if cfi == nil { + return } - cachedResp, gotErr := tc.aic.Get(test.Context(t), numaNode, sysName, getFn) - test.CmpErr(t, tc.expErr, gotErr) - if tc.expErr != nil { + if tc.expCached == nil { + if cfi.lastResults != nil { + t.Fatalf("expected empty cache, got %+v", cfi.lastResults) + } return } - if diff := cmp.Diff(srvResp, cachedResp, test.DefaultCmpOpts()...); diff != "" { + + if diff := cmp.Diff(tc.expCached, cfi.lastResults.numaMap, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { t.Fatalf("-want, +got:\n%s", diff) } - - test.AssertEqual(t, tc.expCached, tc.aic.isCached(), "cache state") - test.AssertEqual(t, tc.expRemote, remoteInvoked.Load(), "remote invoked") }) } } -func TestAgent_newLocalFabricCache(t *testing.T) { +func TestAgent_NewInfoCache(t *testing.T) { for name, tc := range map[string]struct { - enabled bool + cfg *Config + expEnabled bool + expIgnoredIfaces common.StringSet + expRefreshInterval time.Duration }{ - "enabled": { - enabled: true, + "default": { + cfg: &Config{}, + expEnabled: true, + }, + "caches disabled": { + cfg: &Config{ + DisableCache: true, + }, + }, + "ignored interfaces": { + cfg: &Config{ + ExcludeFabricIfaces: common.NewStringSet("eth0", "eth1"), + }, + expEnabled: true, + expIgnoredIfaces: common.NewStringSet("eth0", "eth1"), + }, + "refresh interval": { + cfg: &Config{ + CacheExpiration: refreshMinutes(5 * time.Minute), + }, + expEnabled: true, + expRefreshInterval: 5 * time.Minute, + }, + "fabric interfaces": { + cfg: &Config{ + FabricInterfaces: []*NUMAFabricConfig{ + { + NUMANode: 1, + Interfaces: []*FabricInterfaceConfig{ + { + Interface: "if0", + Domain: "d0", + }, + }, + }, + }, + }, + expEnabled: true, }, - "disabled": {}, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - cache := newLocalFabricCache(log, tc.enabled) + ic := NewInfoCache(test.Context(t), log, nil, tc.cfg) - if cache == nil { - t.Fatal("expected non-nil cache") - } + test.AssertEqual(t, tc.expEnabled, ic.IsAttachInfoCacheEnabled(), "") + test.AssertEqual(t, tc.expEnabled, ic.IsFabricCacheEnabled(), "") - test.AssertEqual(t, log, cache.log, "") - test.AssertFalse(t, cache.IsCached(), "default state is uncached") - test.AssertEqual(t, tc.enabled, cache.IsEnabled(), "") + test.AssertEqual(t, tc.expIgnoredIfaces, ic.ignoreIfaces, "") + test.AssertEqual(t, tc.expRefreshInterval, ic.attachInfoRefresh, "") }) } } -func newTestFabricCache(t *testing.T, log logging.Logger, cacheMap *NUMAFabric) *localFabricCache { - t.Helper() +func TestAgent_InfoCache_EnableAttachInfoCache(t *testing.T) { + for name, tc := range map[string]struct { + ic *InfoCache + refreshInterval time.Duration + expEnabled bool + }{ + "nil": {}, + "disabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{disableAttachInfoCache: true}), + expEnabled: true, + }, + "already enabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{}), + expEnabled: true, + }, + "refresh interval": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{disableAttachInfoCache: true}), + refreshInterval: time.Minute, + expEnabled: true, + }, + } { + t.Run(name, func(t *testing.T) { + tc.ic.EnableAttachInfoCache(tc.refreshInterval) - cache := newLocalFabricCache(log, true) - if cache == nil { - t.Fatalf("nil cache") + test.AssertEqual(t, tc.expEnabled, tc.ic.IsAttachInfoCacheEnabled(), "") + }) } - cache.localNUMAFabric = cacheMap - cache.initialized.SetTrue() - - cache.localNUMAFabric.getAddrInterface = getMockNetInterfaceSuccess - - return cache } -func TestAgent_localFabricCache_IsEnabled(t *testing.T) { +func TestAgent_InfoCache_DisableAttachInfoCache(t *testing.T) { for name, tc := range map[string]struct { - fic *localFabricCache - expEnabled bool + ic *InfoCache }{ "nil": {}, - "not enabled": { - fic: &localFabricCache{}, + "already disabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{disableAttachInfoCache: true}), }, "enabled": { - fic: &localFabricCache{enabled: atm.NewBool(true)}, - expEnabled: true, + ic: newTestInfoCache(t, nil, testInfoCacheParams{}), }, } { t.Run(name, func(t *testing.T) { - enabled := tc.fic.IsEnabled() + tc.ic.DisableAttachInfoCache() - test.AssertEqual(t, tc.expEnabled, enabled, "IsEnabled()") + test.AssertFalse(t, tc.ic.IsAttachInfoCacheEnabled(), "") }) } } -func TestAgent_localFabricCache_CacheScan(t *testing.T) { +func TestAgent_InfoCache_EnableFabricCache(t *testing.T) { for name, tc := range map[string]struct { - lfc *localFabricCache - input *hardware.FabricInterfaceSet - expCached bool - expResult *NUMAFabric + ic *InfoCache + startEnabled bool + expEnabled bool }{ "nil": {}, "disabled": { - lfc: newLocalFabricCache(nil, false), + ic: newTestInfoCache(t, nil, testInfoCacheParams{disableFabricCache: true}), + expEnabled: true, }, - "no devices in scan": { - lfc: newLocalFabricCache(nil, true), - expCached: true, - expResult: &NUMAFabric{ - numaMap: map[int][]*FabricInterface{}, - }, - }, - "successfully cached": { - lfc: newLocalFabricCache(nil, true), - input: hardware.NewFabricInterfaceSet( - &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+sockets"), - Name: "test0", - NetInterfaces: common.NewStringSet("os_test0"), - NUMANode: 1, - DeviceClass: hardware.Ether, - }, - &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+verbs"), - Name: "test1", - NetInterfaces: common.NewStringSet("os_test1"), - NUMANode: 0, - DeviceClass: hardware.Infiniband, - }, - &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+sockets"), - Name: "test2", - NetInterfaces: common.NewStringSet("os_test2"), - NUMANode: 0, - DeviceClass: hardware.Ether, - }, - &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+sockets"), - Name: "lo", - NetInterfaces: common.NewStringSet("lo"), - NUMANode: 0, - DeviceClass: hardware.Loopback, + "already enabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{}), + startEnabled: true, + expEnabled: true, + }, + } { + t.Run(name, func(t *testing.T) { + test.AssertEqual(t, tc.startEnabled, tc.ic.IsFabricCacheEnabled(), "") + + tc.ic.EnableFabricCache() + + test.AssertEqual(t, tc.expEnabled, tc.ic.IsFabricCacheEnabled(), "") + }) + } +} + +func TestAgent_InfoCache_EnableStaticFabricCache(t *testing.T) { + cfg := []*NUMAFabricConfig{ + { + NUMANode: 1, + Interfaces: []*FabricInterfaceConfig{ + { + Interface: "if0", + Domain: "if0", }, - ), - expCached: true, - expResult: &NUMAFabric{ - numaMap: map[int][]*FabricInterface{ - 0: { - - { - Name: "lo", - NetDevClass: hardware.Loopback, - hw: &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+sockets"), - Name: "lo", - NetInterfaces: common.NewStringSet("lo"), - NUMANode: 0, - DeviceClass: hardware.Loopback, - }, - }, - { - Name: "os_test1", - Domain: "test1", - NetDevClass: hardware.Infiniband, - hw: &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+verbs"), - Name: "test1", - NetInterfaces: common.NewStringSet("os_test1"), - NUMANode: 0, - DeviceClass: hardware.Infiniband, - }, - }, - { - Name: "os_test2", - Domain: "test2", - NetDevClass: hardware.Ether, - hw: &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+sockets"), - Name: "test2", - NetInterfaces: common.NewStringSet("os_test2"), - NUMANode: 0, - DeviceClass: hardware.Ether, - }, - }, - }, - 1: { - { - Name: "os_test0", - Domain: "test0", - NetDevClass: hardware.Ether, - hw: &hardware.FabricInterface{ - Providers: testFabricProviderSet("ofi+sockets"), - Name: "test0", - NetInterfaces: common.NewStringSet("os_test0"), - NUMANode: 1, - DeviceClass: hardware.Ether, - }, - }, - }, + { + Interface: "if0", + Domain: "d0", }, }, }, - "ignores passed down": { - lfc: newLocalFabricCache(nil, true).WithConfig(&Config{ - ExcludeFabricIfaces: common.NewStringSet("test1"), - }), - input: hardware.NewFabricInterfaceSet(), - expCached: true, - expResult: &NUMAFabric{ - numaMap: map[int][]*FabricInterface{}, - ignoreIfaces: common.NewStringSet("test1"), - }, + } + + for name, tc := range map[string]struct { + ic *InfoCache + startEnabled bool + expEnabled bool + }{ + "nil": {}, + "disabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{disableFabricCache: true}), + expEnabled: true, + }, + "already enabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{}), + startEnabled: true, + expEnabled: true, }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - if tc.lfc != nil { - tc.lfc.log = log - } + test.AssertEqual(t, tc.startEnabled, tc.ic.IsFabricCacheEnabled(), "") - tc.lfc.CacheScan(test.Context(t), tc.input) + nf := NUMAFabricFromConfig(log, cfg) + tc.ic.EnableStaticFabricCache(test.Context(t), nf) - test.AssertEqual(t, tc.expCached, tc.lfc.IsCached(), "IsCached()") - - if tc.lfc == nil { + test.AssertEqual(t, tc.expEnabled, tc.ic.IsFabricCacheEnabled(), "") + if tc.ic == nil { return } - if tc.expCached { - if diff := cmp.Diff(tc.expResult.numaMap, tc.lfc.localNUMAFabric.numaMap, - cmp.AllowUnexported(FabricInterface{}, hardware.FabricProviderSet{}), - ); diff != "" { - t.Fatalf("-want, +got:\n%s", diff) + if tc.expEnabled { + item, cleanup, err := tc.ic.cache.Get(test.Context(t), fabricKey) + test.CmpErr(t, nil, err) + defer cleanup() + + fabricCache, ok := item.(*cachedFabricInfo) + if !ok { + t.Fatalf("bad item type %T", item) } - if diff := cmp.Diff(tc.expResult.ignoreIfaces, tc.lfc.localNUMAFabric.ignoreIfaces); diff != "" { + + test.AssertEqual(t, time.Duration(0), fabricCache.refreshInterval, "expected no refresh") + if diff := cmp.Diff(nf.numaMap, fabricCache.lastResults.numaMap, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { t.Fatalf("-want, +got:\n%s", diff) } - } else if len(tc.lfc.localNUMAFabric.numaMap) > 0 { - t.Fatalf("expected nothing cached, found: %+v", tc.lfc.localNUMAFabric.numaMap) } }) } } -func TestAgent_localFabricCache_Cache(t *testing.T) { +func TestAgent_InfoCache_DisableFabricCache(t *testing.T) { for name, tc := range map[string]struct { - lfc *localFabricCache - input *NUMAFabric - expCached bool - expIgnored common.StringSet + ic *InfoCache + startEnabled bool }{ "nil": {}, - "nil NUMAFabric": { - lfc: newLocalFabricCache(nil, true), + "already disabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{disableFabricCache: true}), + }, + "enabled": { + ic: newTestInfoCache(t, nil, testInfoCacheParams{}), + startEnabled: true, + }, + } { + t.Run(name, func(t *testing.T) { + test.AssertEqual(t, tc.startEnabled, tc.ic.IsFabricCacheEnabled(), "") + + tc.ic.DisableFabricCache() + + test.AssertFalse(t, tc.ic.IsFabricCacheEnabled(), "") + }) + } +} + +func TestAgent_InfoCache_AddProvider(t *testing.T) { + for name, tc := range map[string]struct { + ic *InfoCache + input string + expProviders common.StringSet + }{ + "nil": { + input: "something", }, - "no NUMA nodes": { - lfc: newLocalFabricCache(nil, true), - input: &NUMAFabric{ - numaMap: map[int][]*FabricInterface{}, + "empty": { + ic: &InfoCache{}, + input: "something", + expProviders: common.NewStringSet("something"), + }, + "add": { + ic: &InfoCache{ + providers: common.NewStringSet("something"), }, - expCached: true, + input: "something else", + expProviders: common.NewStringSet("something", "something else"), }, - "successfully cached": { - lfc: newLocalFabricCache(nil, true), - input: &NUMAFabric{ - numaMap: map[int][]*FabricInterface{ - 0: { - { - Name: "test1", - NetDevClass: hardware.Infiniband, - }, - { - Name: "test2", - NetDevClass: hardware.Ether, - }, + "ignore empty string": { + ic: &InfoCache{}, + input: "", + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.ic != nil { + tc.ic.log = log + } + + tc.ic.AddProvider(tc.input) + + if tc.ic == nil { + return + } + if diff := cmp.Diff(tc.expProviders, tc.ic.providers); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + }) + } +} + +func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { + ctlResp := &control.GetAttachInfoResp{ + System: "dontcare", + ServiceRanks: []*control.PrimaryServiceRank{{Rank: 1, Uri: "my uri"}}, + MSRanks: []uint32{0, 1, 2, 3}, + ClientNetHint: control.ClientNetworkHint{ + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Ether), + }, + } + + for name, tc := range map[string]struct { + getInfoCache func(logging.Logger) *InfoCache + system string + remoteResp *control.GetAttachInfoResp + remoteErr error + expErr error + expResp *control.GetAttachInfoResp + expRemote bool + expCached bool + }{ + "nil": { + expErr: errors.New("nil"), + }, + "disabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableAttachInfoCache: true, + }) + }, + remoteResp: ctlResp, + expResp: ctlResp, + expRemote: true, + }, + "disabled fails fetch": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableAttachInfoCache: true, + }) + }, + remoteErr: errors.New("mock remote"), + expErr: errors.New("mock remote"), + expRemote: true, + }, + "enabled but empty": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{}) + }, + remoteResp: ctlResp, + expResp: ctlResp, + expRemote: true, + expCached: true, + }, + "enabled but empty fails fetch": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{}) + }, + remoteErr: errors.New("mock remote"), + expErr: errors.New("mock remote"), + expRemote: true, + }, + "enabled and cached": { + getInfoCache: func(l logging.Logger) *InfoCache { + ic := newTestInfoCache(t, l, testInfoCacheParams{}) + ic.cache.Set(&cachedAttachInfo{ + fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.New("shouldn't call cached remote") }, - 1: { - { - Name: "test0", - NetDevClass: hardware.Ether, - }, + lastResponse: ctlResp, + cacheItem: cacheItem{lastCached: time.Now()}, + system: "test", + }) + return ic + }, + system: "test", + remoteErr: errors.New("shouldn't call remote"), + expResp: ctlResp, + expCached: true, + }, + "default system": { + getInfoCache: func(l logging.Logger) *InfoCache { + ic := newTestInfoCache(t, l, testInfoCacheParams{}) + ic.cache.Set(&cachedAttachInfo{ + fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.New("shouldn't call cached remote") }, - }, + lastResponse: ctlResp, + cacheItem: cacheItem{lastCached: time.Now()}, + system: build.DefaultSystemName, + }) + return ic }, + remoteErr: errors.New("shouldn't call remote"), + expResp: ctlResp, expCached: true, }, - "ignores passed down": { - lfc: newLocalFabricCache(nil, true).WithConfig(&Config{ - ExcludeFabricIfaces: common.NewStringSet("test1"), - }), - input: &NUMAFabric{ - numaMap: map[int][]*FabricInterface{ - 0: { - { - Name: "test1", - NetDevClass: hardware.Infiniband, - }, + "cache miss": { + getInfoCache: func(l logging.Logger) *InfoCache { + ic := newTestInfoCache(t, l, testInfoCacheParams{}) + ic.cache.Set(&cachedAttachInfo{ + fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.New("shouldn't call cached remote") }, - }, + lastResponse: &control.GetAttachInfoResp{}, + cacheItem: cacheItem{lastCached: time.Now()}, + system: "test", + }) + return ic }, + system: "somethingelse", + remoteResp: ctlResp, + expResp: ctlResp, expCached: true, - expIgnored: common.NewStringSet("test1"), + expRemote: true, }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - if tc.lfc != nil { - tc.lfc.log = log + var ic *InfoCache + if tc.getInfoCache != nil { + ic = tc.getInfoCache(log) + } + + calledRemote := false + if ic != nil { + ic.getAttachInfo = func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + calledRemote = true + return tc.remoteResp, tc.remoteErr + } } - if tc.input != nil { - tc.input.log = log + + if tc.system == "" { + tc.system = build.DefaultSystemName } + resp, err := ic.GetAttachInfo(test.Context(t), tc.system) - tc.lfc.Cache(test.Context(t), tc.input) + test.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResp, resp); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } - test.AssertEqual(t, tc.expCached, tc.lfc.IsCached(), "IsCached()") + test.AssertEqual(t, tc.expRemote, calledRemote, "") - if tc.lfc == nil { + if ic == nil { return } - if tc.lfc.localNUMAFabric == nil { - t.Fatal("NUMAFabric in cache is nil") + if tc.expCached && tc.expResp != nil { + cachedItem, unlockItem, err := ic.cache.Get(test.Context(t), sysAttachInfoKey(tc.system)) + if err != nil { + t.Fatal(err) + } + defer unlockItem() + cached, ok := cachedItem.(*cachedAttachInfo) + test.AssertTrue(t, ok, "wrong type cached") + if diff := cmp.Diff(tc.expResp, cached.lastResponse); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } } + }) + } +} - if tc.expCached { - if diff := cmp.Diff(tc.input.numaMap, tc.lfc.localNUMAFabric.numaMap, cmp.AllowUnexported(FabricInterface{})); diff != "" { - t.Fatalf("-want, +got:\n%s", diff) +func mockGetAddrInterface(name string) (addrFI, error) { + return &mockNetInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.IP{127, 0, 0, 1}}, + }, + }, nil +} + +func TestAgent_InfoCache_GetFabricDevice(t *testing.T) { + testSet := hardware.NewFabricInterfaceSet( + &hardware.FabricInterface{ + Name: "dev0", + NetInterfaces: common.NewStringSet("test0"), + DeviceClass: hardware.Ether, + Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "testprov"}), + }, + &hardware.FabricInterface{ + Name: "dev1", + NetInterfaces: common.NewStringSet("test1"), + DeviceClass: hardware.Ether, + Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "testprov"}), + }) + + for name, tc := range map[string]struct { + getInfoCache func(logging.Logger) *InfoCache + devClass hardware.NetDevClass + provider string + iface string + fabricResp *hardware.FabricInterfaceSet + fabricErr error + expResult *FabricInterface + expErr error + expScan bool + expCachedFabric *hardware.FabricInterfaceSet + }{ + "nil": { + expErr: errors.New("nil"), + }, + "disabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableFabricCache: true, + }) + }, + devClass: hardware.Ether, + provider: "testprov", + fabricResp: testSet, + expScan: true, + expResult: &FabricInterface{ + Name: "test0", + Domain: "dev0", + NetDevClass: hardware.Ether, + }, + }, + "disabled fails fabric ready check": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableFabricCache: true, + mockNetIfaces: func() ([]net.Interface, error) { + return nil, errors.New("mock net ifaces") + }, + }) + }, + devClass: hardware.Ether, + provider: "testprov", + fabricErr: errors.New("shouldn't call scan"), + expErr: errors.New("mock net ifaces"), + }, + "disabled fails fetch": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableFabricCache: true, + }) + }, + devClass: hardware.Ether, + provider: "testprov", + fabricErr: errors.New("mock fabric scan"), + expScan: true, + expErr: errors.New("mock fabric scan"), + }, + "enabled but empty": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{}) + }, + devClass: hardware.Ether, + provider: "testprov", + fabricResp: testSet, + expScan: true, + expResult: &FabricInterface{ + Name: "test0", + Domain: "dev0", + NetDevClass: hardware.Ether, + }, + expCachedFabric: testSet, + }, + "enabled but empty fails ready wait": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + mockNetIfaces: func() ([]net.Interface, error) { + return nil, errors.New("mock net ifaces") + }, + }) + }, + devClass: hardware.Ether, + provider: "testprov", + fabricErr: errors.New("shouldn't call scan"), + expErr: errors.New("mock net ifaces"), + }, + "enabled but empty fails fetch": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{}) + }, + devClass: hardware.Ether, + provider: "testprov", + fabricErr: errors.New("mock fabric scan"), + expScan: true, + expErr: errors.New("mock fabric scan"), + }, + "enabled and cached": { + getInfoCache: func(l logging.Logger) *InfoCache { + ic := newTestInfoCache(t, l, testInfoCacheParams{}) + nf := NUMAFabricFromScan(test.Context(t), l, testSet) + nf.getAddrInterface = mockGetAddrInterface + ic.cache.Set(&cachedFabricInfo{ + fetch: func(ctx context.Context, providers ...string) (*NUMAFabric, error) { + return nil, errors.New("shouldn't call cached fetch") + }, + lastResults: nf, + cacheItem: cacheItem{lastCached: time.Now()}, + }) + return ic + }, + devClass: hardware.Ether, + provider: "testprov", + fabricErr: errors.New("shouldn't call scan"), + expResult: &FabricInterface{ + Name: "test0", + Domain: "dev0", + NetDevClass: hardware.Ether, + }, + expCachedFabric: testSet, + }, + "requested not found": { + getInfoCache: func(l logging.Logger) *InfoCache { + ic := newTestInfoCache(t, l, testInfoCacheParams{}) + nf := NUMAFabricFromScan(test.Context(t), l, testSet) + nf.getAddrInterface = mockGetAddrInterface + ic.cache.Set(&cachedFabricInfo{ + fetch: func(ctx context.Context, providers ...string) (*NUMAFabric, error) { + return nil, errors.New("shouldn't call cached fetch") + }, + lastResults: nf, + cacheItem: cacheItem{lastCached: time.Now()}, + }) + return ic + }, + devClass: hardware.Ether, + provider: "bad", + fabricErr: errors.New("shouldn't call scan"), + expErr: errors.New("no suitable fabric interface"), + expCachedFabric: testSet, + }, + "specific interface": { + getInfoCache: func(l logging.Logger) *InfoCache { + ic := newTestInfoCache(t, l, testInfoCacheParams{}) + nf := NUMAFabricFromScan(test.Context(t), l, testSet) + nf.getAddrInterface = mockGetAddrInterface + ic.cache.Set(&cachedFabricInfo{ + fetch: func(ctx context.Context, providers ...string) (*NUMAFabric, error) { + return nil, errors.New("shouldn't call cached fetch") + }, + lastResults: nf, + cacheItem: cacheItem{lastCached: time.Now()}, + }) + return ic + }, + devClass: hardware.Ether, + provider: "testprov", + iface: "test1", + fabricErr: errors.New("shouldn't call scan"), + expResult: &FabricInterface{ + Name: "test1", + Domain: "dev1", + NetDevClass: hardware.Ether, + }, + expCachedFabric: testSet, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + var ic *InfoCache + if tc.getInfoCache != nil { + ic = tc.getInfoCache(log) + } + + calledScan := false + if ic != nil { + ic.fabricScan = func(_ context.Context, _ ...string) (*NUMAFabric, error) { + calledScan = true + if tc.fabricResp != nil { + nf := NUMAFabricFromScan(test.Context(t), log, tc.fabricResp) + nf.getAddrInterface = mockGetAddrInterface + return nf, nil + } + return nil, tc.fabricErr } - if diff := cmp.Diff(tc.expIgnored, tc.lfc.localNUMAFabric.ignoreIfaces); diff != "" { - t.Fatalf("-want, +got:\n%s", diff) + } + + result, err := ic.GetFabricDevice(test.Context(t), &FabricIfaceParams{ + DevClass: tc.devClass, + Provider: tc.provider, + Interface: tc.iface, + }) + + test.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResult, result, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + + test.AssertEqual(t, tc.expScan, calledScan, "") + + if ic == nil { + return + } + + if tc.expCachedFabric != nil { + data, unlock, err := ic.cache.Get(test.Context(t), fabricKey) + if err != nil { + t.Fatal(err) + } + defer unlock() + + cached, ok := data.(*cachedFabricInfo) + test.AssertTrue(t, ok, "bad cached data type") + + expNF := NUMAFabricFromScan(test.Context(t), log, tc.expCachedFabric) + if diff := cmp.Diff(expNF.numaMap, cached.lastResults.numaMap, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) } - } else if len(tc.lfc.localNUMAFabric.numaMap) > 0 { - t.Fatalf("expected nothing cached, got: %+v", tc.lfc.localNUMAFabric.numaMap) } }) } } -func TestAgent_localFabricCache_GetDevice(t *testing.T) { - populatedCache := &NUMAFabric{ - numaMap: map[int][]*FabricInterface{ - 0: { - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test1"), - DeviceClass: hardware.Infiniband, - Name: "test1_alias", - Providers: testFabricProviderSet("ofi+verbs"), - })[0], - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test2"), - DeviceClass: hardware.Ether, - Name: "test2_alias", - Providers: testFabricProviderSet("ofi+sockets"), - })[0], - }, - 1: { - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test3"), - DeviceClass: hardware.Infiniband, - Name: "test3_alias", - Providers: testFabricProviderSet("ofi+verbs"), - })[0], - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test4"), - DeviceClass: hardware.Infiniband, - Name: "test4_alias", - Providers: testFabricProviderSet("ofi+verbs"), - })[0], - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test5"), - DeviceClass: hardware.Ether, - Name: "test5_alias", - Providers: testFabricProviderSet("ofi+sockets"), - })[0], - }, - 2: { - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test6"), - DeviceClass: hardware.Ether, - Providers: testFabricProviderSet("ofi+sockets"), - })[0], - fabricInterfacesFromHardware(&hardware.FabricInterface{ - NetInterfaces: common.NewStringSet("test7"), - DeviceClass: hardware.Ether, - Name: "test7_alias", - Providers: testFabricProviderSet("ofi+sockets", "ofi+verbs"), - })[0], - }, +func TestAgent_InfoCache_Refresh(t *testing.T) { + ctlResp := &control.GetAttachInfoResp{ + System: "dontcare", + ServiceRanks: []*control.PrimaryServiceRank{{Rank: 1, Uri: "my uri"}}, + MSRanks: []uint32{0, 1, 2, 3}, + ClientNetHint: control.ClientNetworkHint{ + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Ether), }, } + testSet := hardware.NewFabricInterfaceSet(&hardware.FabricInterface{ + Name: "dev0", + NetInterfaces: common.NewStringSet("test0"), + DeviceClass: hardware.Ether, + Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "testprov"}), + }) + + testSys := "test_sys" + for name, tc := range map[string]struct { - lfc *localFabricCache - params *FabricIfaceParams - expDevice *FabricInterface - expErr error + getInfoCache func(logging.Logger) *InfoCache + expErr error + expCachedFabric *hardware.FabricInterfaceSet + expCachedAttachInfo *control.GetAttachInfoResp }{ - "nil cache": { - expErr: NotCachedErr, - }, - "nil params": { - lfc: newTestFabricCache(t, nil, populatedCache), + "nil": { expErr: errors.New("nil"), }, - "nothing cached": { - lfc: &localFabricCache{}, - params: &FabricIfaceParams{}, - expErr: NotCachedErr, + "both disabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableFabricCache: true, + disableAttachInfoCache: true, + }) + }, + expErr: errors.New("disabled"), }, - "request verbs": { - lfc: newTestFabricCache(t, nil, populatedCache), - params: &FabricIfaceParams{ - NUMANode: 2, - Provider: "ofi+verbs", - DevClass: hardware.Ether, + "both enabled, cache items not created": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{}) }, - expDevice: &FabricInterface{ - Name: "test7", - NetDevClass: hardware.Ether, - Domain: "test7_alias", + }, + "both enabled, cache items exist": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + cachedItems: []cache.Item{ + newCachedAttachInfo(0, testSys, nil, + func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return ctlResp, nil + }), + newCachedFabricInfo(l, + func(_ context.Context, _ ...string) (*NUMAFabric, error) { + return NUMAFabricFromScan(test.Context(t), l, testSet), nil + }), + }, + }) }, + expCachedFabric: testSet, + expCachedAttachInfo: ctlResp, }, - "request sockets": { - lfc: newTestFabricCache(t, nil, populatedCache), - params: &FabricIfaceParams{ - NUMANode: 0, - Provider: "ofi+sockets", - DevClass: hardware.Ether, + "fabric disabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableFabricCache: true, + cachedItems: []cache.Item{ + newCachedAttachInfo(0, testSys, nil, + func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return ctlResp, nil + }), + newCachedFabricInfo(l, + func(_ context.Context, _ ...string) (*NUMAFabric, error) { + return nil, errors.New("shouldn't call fabric") + }), + }, + }) }, - expDevice: &FabricInterface{ - Name: "test2", - NetDevClass: hardware.Ether, - Domain: "test2_alias", + expCachedAttachInfo: ctlResp, + }, + "attach info disabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableAttachInfoCache: true, + cachedItems: []cache.Item{ + newCachedAttachInfo(0, testSys, nil, + func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.New("shouldn't call GetAttachInfo") + }), + newCachedFabricInfo(l, + func(_ context.Context, _ ...string) (*NUMAFabric, error) { + return NUMAFabricFromScan(test.Context(t), l, testSet), nil + }), + }, + }) }, + expCachedFabric: testSet, }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - if tc.lfc != nil { - tc.lfc.log = log - if tc.lfc.localNUMAFabric != nil { - tc.lfc.localNUMAFabric.log = log + var ic *InfoCache + if tc.getInfoCache != nil { + ic = tc.getInfoCache(log) + } + + err := ic.Refresh(test.Context(t)) + + test.CmpErr(t, tc.expErr, err) + + if tc.expCachedFabric != nil { + data, unlock, err := ic.cache.Get(test.Context(t), fabricKey) + if err != nil { + t.Fatal(err) } + defer unlock() + + cached, ok := data.(*cachedFabricInfo) + test.AssertTrue(t, ok, "bad cached data type") + + expNF := NUMAFabricFromScan(test.Context(t), log, tc.expCachedFabric) + if diff := cmp.Diff(expNF.numaMap, cached.lastResults.numaMap, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + } + + if tc.expCachedAttachInfo != nil { + data, unlock, err := ic.cache.Get(test.Context(t), sysAttachInfoKey(testSys)) + if err != nil { + t.Fatal(err) + } + defer unlock() + + cached, ok := data.(*cachedAttachInfo) + test.AssertTrue(t, ok, "bad cached data type") + + if diff := cmp.Diff(tc.expCachedAttachInfo, cached.lastResponse); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) + } + } + }) + } +} + +func TestAgent_InfoCache_waitFabricReady(t *testing.T) { + defaultNetIfaceFn := func() ([]net.Interface, error) { + return []net.Interface{ + {Name: "t0"}, + {Name: "t1"}, + {Name: "t2"}, + }, nil + } + + defaultDevClassProv := &hardware.MockNetDevClassProvider{ + GetNetDevClassReturn: []hardware.MockGetNetDevClassResult{ + { + ExpInput: "t0", + NDC: hardware.Infiniband, + }, + { + ExpInput: "t1", + NDC: hardware.Infiniband, + }, + { + ExpInput: "t2", + NDC: hardware.Ether, + }, + }, + } + + for name, tc := range map[string]struct { + netIfacesFn func() ([]net.Interface, error) + devClassProv *hardware.MockNetDevClassProvider + devStateProv *hardware.MockNetDevStateProvider + netDevClass hardware.NetDevClass + expErr error + expChecked []string + }{ + "netIfaces fails": { + netIfacesFn: func() ([]net.Interface, error) { + return nil, errors.New("mock netIfaces") + }, + netDevClass: hardware.Infiniband, + expErr: errors.New("mock netIfaces"), + }, + "GetNetDevClass fails": { + devClassProv: &hardware.MockNetDevClassProvider{ + GetNetDevClassReturn: []hardware.MockGetNetDevClassResult{ + { + ExpInput: "t0", + Err: errors.New("mock GetNetDevClass"), + }, + }, + }, + netDevClass: hardware.Infiniband, + expErr: errors.New("mock GetNetDevClass"), + }, + "GetNetDevState fails": { + devStateProv: &hardware.MockNetDevStateProvider{ + GetStateReturn: []hardware.MockNetDevStateResult{ + {Err: errors.New("mock NetDevStateProvider")}, + }, + }, + netDevClass: hardware.Infiniband, + expErr: errors.New("mock NetDevStateProvider"), + expChecked: []string{"t0"}, + }, + "down devices are ignored": { + devStateProv: &hardware.MockNetDevStateProvider{ + GetStateReturn: []hardware.MockNetDevStateResult{ + {State: hardware.NetDevStateDown}, + {State: hardware.NetDevStateReady}, + }, + }, + netDevClass: hardware.Infiniband, + expChecked: []string{"t0", "t1"}, + }, + "success": { + netDevClass: hardware.Infiniband, + expChecked: []string{"t0", "t1"}, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.netIfacesFn == nil { + tc.netIfacesFn = defaultNetIfaceFn + } + + if tc.devClassProv == nil { + tc.devClassProv = defaultDevClassProv + } + + if tc.devStateProv == nil { + tc.devStateProv = &hardware.MockNetDevStateProvider{} + } + + ic := &InfoCache{ + log: log, + netIfaces: tc.netIfacesFn, + devClassGetter: tc.devClassProv, + devStateGetter: tc.devStateProv, } - dev, err := tc.lfc.GetDevice(tc.params) + err := ic.waitFabricReady(test.Context(t), tc.netDevClass) test.CmpErr(t, tc.expErr, err) - if diff := cmp.Diff(tc.expDevice, dev, cmpopts.IgnoreUnexported(FabricInterface{})); diff != "" { + if diff := cmp.Diff(tc.expChecked, tc.devStateProv.GetStateCalled); diff != "" { t.Fatalf("-want, +got:\n%s", diff) } }) diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index 17426339ef9..cc83832a27b 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -10,7 +10,6 @@ import ( "net" "strings" "sync" - "time" "github.com/pkg/errors" "golang.org/x/net/context" @@ -35,21 +34,17 @@ import ( // to MS. type mgmtModule struct { attachInfoMutex sync.RWMutex + fabricMutex sync.RWMutex log logging.Logger sys string ctlInvoker control.Invoker - attachInfo *attachInfoCache - fabricInfo *localFabricCache + cache *InfoCache monitor *procMon useDefaultNUMA bool - numaGetter hardware.ProcessNUMAProvider - providerIdx uint - devClassGetter hardware.NetDevClassProvider - devStateGetter hardware.NetDevStateProvider - fabricScanner *hardware.FabricScanner - netIfaces func() ([]net.Interface, error) + numaGetter hardware.ProcessNUMAProvider + providerIdx uint } func (mod *mgmtModule) HandleCall(ctx context.Context, session *drpc.Session, method drpc.Method, req []byte) ([]byte, error) { @@ -180,9 +175,9 @@ func (mod *mgmtModule) getNUMANode(ctx context.Context, pid int32) (uint, error) } func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, req *mgmtpb.GetAttachInfoReq) (*mgmtpb.GetAttachInfoResp, error) { - rawResp, err := mod.getAttachInfoResp(ctx, numaNode, req.Sys) + rawResp, err := mod.getAttachInfoResp(ctx, req.Sys) if err != nil { - mod.log.Errorf("failed to fetch remote AttachInfo: %s", err.Error()) + mod.log.Errorf("failed to fetch AttachInfo: %s", err.Error()) return nil, err } @@ -223,8 +218,17 @@ func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, req *mgm return resp, nil } -func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) { - return mod.attachInfo.Get(ctx, numaNode, sys, mod.getAttachInfoRemote) +func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, sys string) (*mgmtpb.GetAttachInfoResp, error) { + ctlResp, err := mod.cache.GetAttachInfo(ctx, sys) + if err != nil { + return nil, err + } + + resp := new(mgmtpb.GetAttachInfoResp) + if err := convert.Types(ctlResp, resp); err != nil { + return nil, err + } + return resp, nil } func (mod *mgmtModule) selectAttachInfo(ctx context.Context, srvResp *mgmtpb.GetAttachInfoResp, iface, domain string) (*mgmtpb.GetAttachInfoResp, error) { @@ -328,88 +332,8 @@ func (mod *mgmtModule) getProviderIdxURIs(srvResp *mgmtpb.GetAttachInfoResp, idx return uris, nil } -func (mod *mgmtModule) getAttachInfoRemote(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) { - // Ask the MS for _all_ info, regardless of pbReq.AllRanks, so that the - // cache can serve future "pbReq.AllRanks == true" requests. - req := new(control.GetAttachInfoReq) - req.SetSystem(sys) - req.AllRanks = true - resp, err := control.GetAttachInfo(ctx, mod.ctlInvoker, req) - if err != nil { - return nil, errors.Wrapf(err, "GetAttachInfo %+v", req) - } - - if resp.ClientNetHint.Provider == "" { - return nil, errors.New("GetAttachInfo response contained no provider") - } - - pbResp := new(mgmtpb.GetAttachInfoResp) - if err := convert.Types(resp, pbResp); err != nil { - return nil, errors.Wrap(err, "Failed to convert GetAttachInfo response") - } - - return pbResp, nil -} - func (mod *mgmtModule) getFabricInterface(ctx context.Context, params *FabricIfaceParams) (*FabricInterface, error) { - mod.attachInfoMutex.Lock() - defer mod.attachInfoMutex.Unlock() - - if mod.fabricInfo.IsCached() { - return mod.getCachedInterface(ctx, params) - } - - if err := mod.waitFabricReady(ctx, params.DevClass); err != nil { - return nil, err - } - - result, err := mod.fabricScanner.Scan(ctx, params.Provider) - if err != nil { - return nil, err - } - - mod.fabricInfo.CacheScan(ctx, result) - - return mod.getCachedInterface(ctx, params) -} - -func (mod *mgmtModule) getCachedInterface(ctx context.Context, params *FabricIfaceParams) (*FabricInterface, error) { - if params.Interface != "" { - fi, err := mod.fabricInfo.localNUMAFabric.FindDevice(params) - if err != nil { - return nil, err - } - return fi[0], nil - } - return mod.fabricInfo.GetDevice(params) -} - -func (mod *mgmtModule) waitFabricReady(ctx context.Context, netDevClass hardware.NetDevClass) error { - if mod.netIfaces == nil { - mod.netIfaces = net.Interfaces - } - ifaces, err := mod.netIfaces() - if err != nil { - return err - } - - var needIfaces []string - for _, iface := range ifaces { - devClass, err := mod.devClassGetter.GetNetDevClass(iface.Name) - if err != nil { - return err - } - if devClass == netDevClass { - needIfaces = append(needIfaces, iface.Name) - } - } - - return hardware.WaitFabricReady(ctx, mod.log, hardware.WaitFabricReadyParams{ - StateProvider: mod.devStateGetter, - FabricIfaces: needIfaces, - IgnoreUnusable: true, - IterationSleep: time.Second, - }) + return mod.cache.GetFabricDevice(ctx, params) } func (mod *mgmtModule) handleNotifyPoolConnect(ctx context.Context, reqb []byte, pid int32) error { @@ -437,3 +361,9 @@ func (mod *mgmtModule) handleNotifyPoolDisconnect(ctx context.Context, reqb []by func (mod *mgmtModule) handleNotifyExit(ctx context.Context, pid int32) { mod.monitor.NotifyExit(ctx, pid) } + +// RefreshCache triggers a refresh of all data that is currently cached. If nothing has been cached +// yet, it does nothing. +func (mod *mgmtModule) RefreshCache(ctx context.Context) error { + return mod.cache.Refresh(ctx) +} diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go index fc4c0fe789a..0746f7fbd06 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc_test.go +++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go @@ -9,7 +9,6 @@ package main import ( "context" "net" - "os" "sync" "testing" @@ -18,7 +17,9 @@ import ( "github.com/pkg/errors" "google.golang.org/protobuf/proto" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/common/proto/convert" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/fault" @@ -40,575 +41,197 @@ func hostResps(resps ...*mgmtpb.GetAttachInfoResp) []*control.HostResponse { } func TestAgent_mgmtModule_getAttachInfo(t *testing.T) { - testSrvResp := func() *mgmtpb.GetAttachInfoResp { - return &mgmtpb.GetAttachInfoResp{ - RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - { - Rank: 0, - Uri: "uri0", - }, - { - Rank: 1, - Uri: "uri1", - }, - { - Rank: 3, - Uri: "uri3", - }, - }, - SecondaryRankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - { - Rank: 0, - Uri: "uri4-sec", - ProviderIdx: 2, - }, - { - Rank: 1, - Uri: "uri5-sec", - ProviderIdx: 2, - }, - { - Rank: 3, - Uri: "uri6-sec", - ProviderIdx: 2, - }, - { - Rank: 0, - Uri: "uri0-sec", - ProviderIdx: 1, - }, - { - Rank: 1, - Uri: "uri1-sec", - ProviderIdx: 1, - }, - { - Rank: 3, - Uri: "uri3-sec", - ProviderIdx: 1, - }, - }, - MsRanks: []uint32{0, 1, 3}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+verbs", - NetDevClass: uint32(hardware.Infiniband), - }, - SecondaryClientNetHints: []*mgmtpb.ClientNetHint{ - { - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Infiniband), - ProviderIdx: 1, - }, - { - Provider: "badidx", - NetDevClass: uint32(hardware.Ether), - ProviderIdx: 0, // bad for secondary - }, - }, - } + testSys := "test_sys" + testResp := &control.GetAttachInfoResp{ + System: "dontcare", + ServiceRanks: []*control.PrimaryServiceRank{{Rank: 1, Uri: "my uri"}}, + MSRanks: []uint32{0, 1, 2, 3}, + ClientNetHint: control.ClientNetworkHint{ + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Ether), + }, } - priResp := func(fi, domain string) *mgmtpb.GetAttachInfoResp { - withHint := testSrvResp() - withHint.ClientNetHint.Interface = fi - withHint.ClientNetHint.Domain = domain + testFIS := hardware.NewFabricInterfaceSet( + &hardware.FabricInterface{ + Name: "test0", + NetInterfaces: common.NewStringSet("test0"), + DeviceClass: hardware.Ether, + Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "ofi+tcp"}), + }, + &hardware.FabricInterface{ + Name: "dev1", + NetInterfaces: common.NewStringSet("test1"), + DeviceClass: hardware.Ether, + NUMANode: 1, + Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "ofi+tcp"}), + }) + + testFabric := NUMAFabricFromScan(test.Context(t), logging.NewCommandLineLogger(), testFIS) + testFabric.getAddrInterface = mockGetAddrInterface - return withHint + reqBytes := func(req *mgmtpb.GetAttachInfoReq) []byte { + t.Helper() + bytes, err := proto.Marshal(req) + if err != nil { + t.Fatal(err) + } + return bytes } - secResp := func(fi, domain string) *mgmtpb.GetAttachInfoResp { - return &mgmtpb.GetAttachInfoResp{ - RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - { - Rank: 0, - Uri: "uri0-sec", - ProviderIdx: 1, - }, - { - Rank: 1, - Uri: "uri1-sec", - ProviderIdx: 1, - }, - { - Rank: 3, - Uri: "uri3-sec", - ProviderIdx: 1, - }, - }, - MsRanks: []uint32{0, 1, 3}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Infiniband), - Interface: fi, - Domain: domain, - ProviderIdx: 1, - }, + respWith := func(in *control.GetAttachInfoResp, iface, domain string) *mgmtpb.GetAttachInfoResp { + t.Helper() + out := new(mgmtpb.GetAttachInfoResp) + if err := convert.Types(in, out); err != nil { + t.Fatal(err) } + out.ClientNetHint.Interface = iface + out.ClientNetHint.Domain = domain + return out } for name, tc := range map[string]struct { - reqIface string - reqDomain string - providerIdx uint - numaNode int - rpcResp *control.HostResponse - expResp *mgmtpb.GetAttachInfoResp - expErr error + sysName string + mockGetAttachInfo getAttachInfoFn + mockFabricScan fabricScanFn + mockGetNetIfaces func() ([]net.Interface, error) + numaGetter *mockNUMAProvider + reqBytes []byte + expResp *mgmtpb.GetAttachInfoResp + expErr error }{ - "RPC error": { - rpcResp: &control.HostResponse{ - Error: errors.New("mock RPC"), - }, - expErr: errors.New("mock RPC"), - }, - "no provider hint": { - rpcResp: &control.HostResponse{ - Message: &mgmtpb.GetAttachInfoResp{ - RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - { - Rank: 0, - Uri: "uri0", - }, - }, - MsRanks: []uint32{0}, - ClientNetHint: &mgmtpb.ClientNetHint{ - NetDevClass: uint32(hardware.Infiniband), - }, - }, - }, - expErr: errors.New("no provider"), + "junk req": { + reqBytes: []byte("garbage"), + expErr: errors.New("unmarshal"), }, - "no provider match": { - rpcResp: &control.HostResponse{ - Message: &mgmtpb.GetAttachInfoResp{ - RankUris: []*mgmtpb.GetAttachInfoResp_RankUri{ - { - Rank: 0, - Uri: "uri0", - }, - }, - MsRanks: []uint32{0}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "notreal", - NetDevClass: uint32(hardware.Infiniband), - }, - }, - }, - expErr: errors.New("no suitable fabric interface"), - }, - "primary provider": { - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: priResp("fi0", "d0"), + "non-matching system name": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{Sys: "bad"}), + expResp: &mgmtpb.GetAttachInfoResp{Status: int32(daos.InvalidInput)}, }, - "secondary provider": { - providerIdx: 1, - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: secResp("fi0", "fi0"), + "get NUMA fails": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{Sys: testSys}), + numaGetter: &mockNUMAProvider{GetNUMANodeIDForPIDErr: errors.New("mock get NUMA")}, + expErr: errors.New("mock get NUMA"), }, - "client req iface and domain": { - reqIface: "fi1", - reqDomain: "d1", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), + "getAttachInfo fails": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{Sys: testSys}), + mockGetAttachInfo: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.New("mock GetAttachInfo") }, - expResp: priResp("fi1", "d1"), + expErr: errors.New("mock GetAttachInfo"), }, - "client req secondary provider": { - reqIface: "fi1", - reqDomain: "fi1", - providerIdx: 1, - rpcResp: &control.HostResponse{ - Message: testSrvResp(), + "waitFabricReady fails": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{Sys: testSys}), + mockGetNetIfaces: func() ([]net.Interface, error) { + return nil, errors.New("mock get net ifaces") }, - expResp: secResp("fi1", "fi1"), + expErr: errors.New("mock get net ifaces"), }, - "client req iface for secondary provider": { - reqIface: "fi1", - reqDomain: "fi1", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), + "scan fabric fails": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{Sys: testSys}), + mockFabricScan: func(_ context.Context, _ ...string) (*NUMAFabric, error) { + return nil, errors.New("mock fabric scan") }, - expResp: secResp("fi1", "fi1"), + expErr: errors.New("mock fabric scan"), }, - "client req iface only": { - reqIface: "fi1", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: secResp("fi1", "fi1"), - }, - "client req domain-only ignored": { - reqDomain: "d2", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: priResp("fi0", "d0"), - }, - "client req provider mismatch ignored": { - reqIface: "fi1", - reqDomain: "d1", - providerIdx: 1, - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: secResp("fi1", "d1"), - }, - "client req iface/domain mismatch ignored": { - reqIface: "fi0", - reqDomain: "d2", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: priResp("fi0", "d2"), + "success": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{Sys: testSys}), + expResp: respWith(testResp, "test1", "dev1"), }, - "client req iface not found ignored": { - reqIface: "notreal", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), - }, - expResp: priResp("notreal", "notreal"), + "no sys succeeds": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{}), + expResp: respWith(testResp, "test1", "dev1"), }, - "client req iface idx malformed": { - reqIface: "bad1", - rpcResp: &control.HostResponse{ - Message: testSrvResp(), + "incompatible error": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{}), + mockGetAttachInfo: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, &fault.Fault{Code: code.ServerWrongSystem} }, - expErr: errors.New("not a secondary provider"), + expResp: &mgmtpb.GetAttachInfoResp{Status: int32(daos.ControlIncompatible)}, }, - "config provider idx out of range": { - providerIdx: 5, - rpcResp: &control.HostResponse{ - Message: testSrvResp(), + "bad cert error": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{}), + mockGetAttachInfo: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, &fault.Fault{Code: code.SecurityInvalidCert} }, - expErr: errors.New("out of range"), + expResp: &mgmtpb.GetAttachInfoResp{Status: int32(daos.BadCert)}, }, - "malformed hint at sec provider idx": { - providerIdx: 2, - rpcResp: &control.HostResponse{ - Message: testSrvResp(), + "MS connection error": { + reqBytes: reqBytes(&mgmtpb.GetAttachInfoReq{}), + mockGetAttachInfo: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.Errorf("unable to contact the %s", build.ManagementServiceName) }, - expErr: errors.New("provider index"), + expResp: &mgmtpb.GetAttachInfoResp{Status: int32(daos.Unreachable)}, }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - testFabric := &NUMAFabric{ - log: log, - numaMap: map[int][]*FabricInterface{ - 0: { - { - Name: "fi0", - Domain: "d0", - NetDevClass: hardware.Infiniband, - hw: &hardware.FabricInterface{ - Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "ofi+verbs"}), - }, - }, - { - Name: "fi0", - NetDevClass: hardware.Infiniband, - hw: &hardware.FabricInterface{ - Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "ofi+tcp"}), - }, - }, - }, - 1: { - { - Name: "fi1", - Domain: "d1", - NetDevClass: hardware.Infiniband, - hw: &hardware.FabricInterface{ - Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "ofi+verbs"}), - }, - }, - { - Name: "fi1", - NetDevClass: hardware.Infiniband, - hw: &hardware.FabricInterface{ - Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "ofi+tcp"}), - }, - }, - { - Name: "bad1", - NetDevClass: hardware.Ether, - hw: &hardware.FabricInterface{ - Providers: hardware.NewFabricProviderSet(&hardware.FabricProvider{Name: "badidx"}), - }, - }, - }, - }, - } - - sysName := "dontcare" - mod := &mgmtModule{ - log: log, - sys: sysName, - fabricInfo: newTestFabricCache(t, log, testFabric), - attachInfo: newAttachInfoCache(log, true), - ctlInvoker: control.NewMockInvoker(log, &control.MockInvokerConfig{ - Sys: sysName, - UnaryResponse: &control.UnaryResponse{ - Responses: []*control.HostResponse{tc.rpcResp}, - }, - }), - providerIdx: tc.providerIdx, + if tc.numaGetter == nil { + tc.numaGetter = &mockNUMAProvider{ + GetNUMANodeIDForPIDResult: 1, + } } - resp, err := mod.getAttachInfo(context.Background(), tc.numaNode, - &mgmtpb.GetAttachInfoReq{ - Sys: sysName, - Interface: tc.reqIface, - Domain: tc.reqDomain, - }) - - test.CmpErr(t, tc.expErr, err) - if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported( - mgmtpb.GetAttachInfoResp{}, - mgmtpb.GetAttachInfoResp_RankUri{}, - mgmtpb.ClientNetHint{}, - )); diff != "" { - t.Fatalf("-want, +got:\n%s", diff) + if tc.mockGetAttachInfo == nil { + tc.mockGetAttachInfo = func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return testResp, nil + } } - }) - } -} - -func TestAgent_mgmtModule_getAttachInfo_cacheResp(t *testing.T) { - testResps := []*mgmtpb.GetAttachInfoResp{ - { - MsRanks: []uint32{0, 1, 3}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Ether), - }, - }, - { - MsRanks: []uint32{0}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Ether), - }, - }, - { - MsRanks: []uint32{2, 3}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Ether), - }, - }, - } - - hostResps := func(resps []*mgmtpb.GetAttachInfoResp) []*control.HostResponse { - result := []*control.HostResponse{} - - for _, r := range resps { - result = append(result, &control.HostResponse{ - Message: r, - }) - } - - return result - } - testFI := fabricInterfacesFromHardware(&hardware.FabricInterface{ - Name: "test0", - NetInterfaces: common.NewStringSet("test0"), - DeviceClass: hardware.Ether, - Providers: testFabricProviderSet("ofi+tcp"), - }) - - hintResp := func(resp *mgmtpb.GetAttachInfoResp) *mgmtpb.GetAttachInfoResp { - withHint := new(mgmtpb.GetAttachInfoResp) - withHint = proto.Clone(resp).(*mgmtpb.GetAttachInfoResp) - withHint.ClientNetHint.Interface = testFI[0].Name - withHint.ClientNetHint.Domain = testFI[0].Name - - return withHint - } - - unaryResps := func(hostResps []*control.HostResponse) []*control.UnaryResponse { - ur := make([]*control.UnaryResponse, 0, len(hostResps)) - for _, hr := range hostResps { - ur = append(ur, &control.UnaryResponse{ - Responses: []*control.HostResponse{hr}, - }) - } - return ur - } - - type attachInfoResult struct { - resp *mgmtpb.GetAttachInfoResp - err error - } - - for name, tc := range map[string]struct { - cacheDisabled bool - rpcResps []*control.HostResponse - expResult []attachInfoResult - }{ - "error": { - rpcResps: []*control.HostResponse{ - { - Error: errors.New("host response"), - }, - }, - expResult: []attachInfoResult{ - { - err: errors.New("host response"), - }, - }, - }, - "incompatible fault": { - rpcResps: []*control.HostResponse{ - { - Error: &fault.Fault{ - Code: code.ServerWrongSystem, - }, - }, - }, - expResult: []attachInfoResult{ - { - resp: &mgmtpb.GetAttachInfoResp{ - Status: int32(daos.ControlIncompatible), - }, - }, - }, - }, - "certificate fault": { - rpcResps: []*control.HostResponse{ - { - Error: &fault.Fault{ - Code: code.SecurityInvalidCert, - }, - }, - }, - expResult: []attachInfoResult{ - { - resp: &mgmtpb.GetAttachInfoResp{ - Status: int32(daos.BadCert), - }, - }, - }, - }, - "connection fault": { - rpcResps: []*control.HostResponse{ - { - Error: &fault.Fault{ - Code: code.ClientConnectionRefused, - }, - }, - }, - expResult: []attachInfoResult{ - { - resp: &mgmtpb.GetAttachInfoResp{ - Status: int32(daos.Unreachable), - }, - }, - }, - }, - "cache disabled": { - cacheDisabled: true, - rpcResps: hostResps(testResps), - expResult: []attachInfoResult{ - { - resp: hintResp(testResps[0]), - }, - { - resp: hintResp(testResps[1]), - }, - { - resp: hintResp(testResps[2]), - }, - }, - }, - "cache": { - rpcResps: hostResps(testResps), - expResult: []attachInfoResult{ - { - resp: hintResp(testResps[0]), - }, - { - resp: hintResp(testResps[0]), - }, - { - resp: hintResp(testResps[0]), - }, - }, - }, - } { - t.Run(name, func(t *testing.T) { - log, buf := logging.NewTestLogger(t.Name()) - defer test.ShowBufferOnFailure(t, buf) - - sysName := "dontcare" - mockInvokerCfg := &control.MockInvokerConfig{ - Sys: sysName, - UnaryResponseSet: []*control.UnaryResponse{}, + if tc.mockFabricScan == nil { + tc.mockFabricScan = func(_ context.Context, _ ...string) (*NUMAFabric, error) { + return testFabric, nil + } } - for _, rpcResp := range tc.rpcResps { - mockInvokerCfg.UnaryResponseSet = append(mockInvokerCfg.UnaryResponseSet, - &control.UnaryResponse{ - Responses: []*control.HostResponse{rpcResp}, - }, - ) + if tc.mockGetNetIfaces == nil { + tc.mockGetNetIfaces = func() ([]net.Interface, error) { + ifaces := []net.Interface{} + for _, dev := range testFIS.NetDevices() { + ifaces = append(ifaces, net.Interface{Name: dev}) + } + return ifaces, nil + } } mod := &mgmtModule{ log: log, - sys: sysName, - fabricInfo: newTestFabricCache(t, log, &NUMAFabric{ - log: log, - numaMap: map[int][]*FabricInterface{ - 0: testFI, - }, - }), - attachInfo: newAttachInfoCache(log, !tc.cacheDisabled), - ctlInvoker: control.NewMockInvoker(log, &control.MockInvokerConfig{ - Sys: sysName, - UnaryResponseSet: unaryResps(tc.rpcResps), + sys: testSys, + cache: newTestInfoCache(t, log, testInfoCacheParams{ + mockGetAttachInfo: tc.mockGetAttachInfo, + mockScanFabric: tc.mockFabricScan, + mockNetIfaces: tc.mockGetNetIfaces, }), - numaGetter: &mockNUMAProvider{}, + numaGetter: tc.numaGetter, } - reqBytes, err := proto.Marshal(&mgmtpb.GetAttachInfoReq{ - Sys: sysName, - }) - if err != nil { - t.Fatal(err) - } + respBytes, err := mod.handleGetAttachInfo(test.Context(t), tc.reqBytes, 123) - for i, exp := range tc.expResult { - t.Logf("iteration %d\n", i) - respBytes, err := mod.handleGetAttachInfo(test.Context(t), reqBytes, int32(os.Getpid())) - - test.CmpErr(t, exp.err, err) + test.CmpErr(t, tc.expErr, err) - var resp mgmtpb.GetAttachInfoResp - if err := proto.Unmarshal(respBytes, &resp); err != nil { - t.Fatal(err) - } - - if exp.resp == nil { - if respBytes == nil { - return - } - t.Fatalf("expected nil response, got:\n%+v\n", &resp) + if tc.expResp == nil { + if respBytes == nil { + return } + t.Fatalf("expected nil response, got bytes: %+v", respBytes) + } - if diff := cmp.Diff(exp.resp, &resp, cmpopts.IgnoreUnexported(mgmtpb.GetAttachInfoResp{}, mgmtpb.ClientNetHint{})); diff != "" { - t.Fatalf("-want, +got:\n%s", diff) - } + resp := new(mgmtpb.GetAttachInfoResp) + err = proto.Unmarshal(respBytes, resp) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tc.expResp, resp, cmpopts.IgnoreUnexported( + mgmtpb.GetAttachInfoResp{}, + mgmtpb.GetAttachInfoResp_RankUri{}, + mgmtpb.ClientNetHint{}, + )); diff != "" { + t.Fatalf("want-, got+:\n%s", diff) } }) } - } func TestAgent_mgmtModule_getAttachInfo_Parallel(t *testing.T) { @@ -616,38 +239,43 @@ func TestAgent_mgmtModule_getAttachInfo_Parallel(t *testing.T) { defer test.ShowBufferOnFailure(t, buf) sysName := "dontcare" - - mod := &mgmtModule{ - log: log, - sys: sysName, - fabricInfo: newTestFabricCache(t, log, &NUMAFabric{ - log: log, - numaMap: map[int][]*FabricInterface{ - 0: fabricInterfacesFromHardware(&hardware.FabricInterface{ - Name: "test0", - NetInterfaces: common.NewStringSet("test0"), - DeviceClass: hardware.Ether, - Providers: testFabricProviderSet("ofi+tcp"), - }), - }, - }), - attachInfo: newAttachInfoCache(log, true), - ctlInvoker: control.NewMockInvoker(log, &control.MockInvokerConfig{ - Sys: sysName, - UnaryResponse: &control.UnaryResponse{ - Responses: []*control.HostResponse{ - { - Message: &mgmtpb.GetAttachInfoResp{ - MsRanks: []uint32{0, 1, 3}, - ClientNetHint: &mgmtpb.ClientNetHint{ - Provider: "ofi+tcp", - NetDevClass: uint32(hardware.Ether), - }, + ctlInvoker := control.NewMockInvoker(log, &control.MockInvokerConfig{ + Sys: sysName, + UnaryResponse: &control.UnaryResponse{ + Responses: []*control.HostResponse{ + { + Message: &mgmtpb.GetAttachInfoResp{ + MsRanks: []uint32{0, 1, 3}, + ClientNetHint: &mgmtpb.ClientNetHint{ + Provider: "ofi+tcp", + NetDevClass: uint32(hardware.Ether), }, }, }, }, - }), + }, + }) + ic := newTestInfoCache(t, log, testInfoCacheParams{ + ctlInvoker: ctlInvoker, + mockScanFabric: func(_ context.Context, _ ...string) (*NUMAFabric, error) { + fis := hardware.NewFabricInterfaceSet(&hardware.FabricInterface{ + Name: "test0", + NetInterfaces: common.NewStringSet("test0"), + Providers: testFabricProviderSet("ofi+tcp"), + DeviceClass: hardware.Ether, + }) + nf := NUMAFabricFromScan(test.Context(t), log, fis) + nf.getAddrInterface = mockGetAddrInterface + return nf, nil + }, + mockGetAttachInfo: control.GetAttachInfo, + }) + + mod := &mgmtModule{ + log: log, + sys: sysName, + cache: ic, + ctlInvoker: ctlInvoker, } var wg sync.WaitGroup @@ -727,113 +355,49 @@ func TestAgent_mgmtModule_getNUMANode(t *testing.T) { } } -func TestAgent_mgmtModule_waitFabricReady(t *testing.T) { - defaultNetIfaceFn := func() ([]net.Interface, error) { - return []net.Interface{ - {Name: "t0"}, - {Name: "t1"}, - {Name: "t2"}, - }, nil - } - - defaultDevClassProv := &hardware.MockNetDevClassProvider{ - GetNetDevClassReturn: []hardware.MockGetNetDevClassResult{ - { - ExpInput: "t0", - NDC: hardware.Infiniband, - }, - { - ExpInput: "t1", - NDC: hardware.Infiniband, - }, - { - ExpInput: "t2", - NDC: hardware.Ether, - }, - }, - } - +func TestAgent_mgmtModule_RefreshCache(t *testing.T) { for name, tc := range map[string]struct { - netIfacesFn func() ([]net.Interface, error) - devClassProv *hardware.MockNetDevClassProvider - devStateProv *hardware.MockNetDevStateProvider - netDevClass hardware.NetDevClass + getInfoCache func(logging.Logger) *InfoCache expErr error - expChecked []string }{ - "netIfaces fails": { - netIfacesFn: func() ([]net.Interface, error) { - return nil, errors.New("mock netIfaces") + "nil cache": { + getInfoCache: func(_ logging.Logger) *InfoCache { return nil }, + expErr: errors.New("nil"), + }, + "caches disabled": { + getInfoCache: func(log logging.Logger) *InfoCache { + return newTestInfoCache(t, log, testInfoCacheParams{ + disableFabricCache: true, + disableAttachInfoCache: true, + }) }, - netDevClass: hardware.Infiniband, - expErr: errors.New("mock netIfaces"), + expErr: errors.New("disabled"), }, - "GetNetDevClass fails": { - devClassProv: &hardware.MockNetDevClassProvider{ - GetNetDevClassReturn: []hardware.MockGetNetDevClassResult{ - { - ExpInput: "t0", - Err: errors.New("mock GetNetDevClass"), + "nothing cached": { + getInfoCache: func(log logging.Logger) *InfoCache { + return newTestInfoCache(t, log, testInfoCacheParams{ + mockGetAttachInfo: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + return nil, errors.New("shouldn't call getAttachInfo") }, - }, - }, - netDevClass: hardware.Infiniband, - expErr: errors.New("mock GetNetDevClass"), - }, - "GetNetDevState fails": { - devStateProv: &hardware.MockNetDevStateProvider{ - GetStateReturn: []hardware.MockNetDevStateResult{ - {Err: errors.New("mock NetDevStateProvider")}, - }, - }, - netDevClass: hardware.Infiniband, - expErr: errors.New("mock NetDevStateProvider"), - expChecked: []string{"t0"}, - }, - "down devices are ignored": { - devStateProv: &hardware.MockNetDevStateProvider{ - GetStateReturn: []hardware.MockNetDevStateResult{ - {State: hardware.NetDevStateDown}, - {State: hardware.NetDevStateReady}, - }, + mockScanFabric: func(_ context.Context, _ ...string) (*NUMAFabric, error) { + return nil, errors.New("shouldn't call fabric scan") + }, + }) }, - netDevClass: hardware.Infiniband, - expChecked: []string{"t0", "t1"}, - }, - "success": { - netDevClass: hardware.Infiniband, - expChecked: []string{"t0", "t1"}, }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - if tc.netIfacesFn == nil { - tc.netIfacesFn = defaultNetIfaceFn - } - - if tc.devClassProv == nil { - tc.devClassProv = defaultDevClassProv - } - - if tc.devStateProv == nil { - tc.devStateProv = &hardware.MockNetDevStateProvider{} - } - mod := &mgmtModule{ - log: log, - netIfaces: tc.netIfacesFn, - devClassGetter: tc.devClassProv, - devStateGetter: tc.devStateProv, + log: log, + cache: tc.getInfoCache(log), } - err := mod.waitFabricReady(test.Context(t), tc.netDevClass) + err := mod.RefreshCache(test.Context(t)) test.CmpErr(t, tc.expErr, err) - if diff := cmp.Diff(tc.expChecked, tc.devStateProv.GetStateCalled); diff != "" { - t.Fatalf("-want, +got:\n%s", diff) - } }) } } diff --git a/src/control/cmd/daos_agent/start.go b/src/control/cmd/daos_agent/start.go index a1b50a1d85a..7665e7faed3 100644 --- a/src/control/cmd/daos_agent/start.go +++ b/src/control/cmd/daos_agent/start.go @@ -72,16 +72,6 @@ func (cmd *startCmd) Execute(_ []string) error { } cmd.Debugf("created dRPC server: %s", time.Since(createDrpcStart)) - aicEnabled := !cmd.attachInfoCacheDisabled() - if !aicEnabled { - cmd.Debug("GetAttachInfo agent caching has been disabled") - } - - ficEnabled := !cmd.fabricCacheDisabled() - if !ficEnabled { - cmd.Debug("Local fabric interface caching has been disabled") - } - hwprovInitStart := time.Now() hwprovFini, err := hwprov.Init(cmd.Logger) if err != nil { @@ -90,36 +80,36 @@ func (cmd *startCmd) Execute(_ []string) error { defer hwprovFini() cmd.Debugf("initialized hardware providers: %s", time.Since(hwprovInitStart)) + cacheStart := time.Now() + cache := NewInfoCache(ctx, cmd.Logger, cmd.ctlInvoker, cmd.cfg) + if cmd.attachInfoCacheDisabled() { + cache.DisableAttachInfoCache() + cmd.Debug("GetAttachInfo agent caching has been disabled") + } + + if cmd.fabricCacheDisabled() { + cache.DisableFabricCache() + cmd.Debug("Local fabric interface caching has been disabled") + } + cmd.Debugf("created cache: %s", time.Since(cacheStart)) + procmonStart := time.Now() procmon := NewProcMon(cmd.Logger, cmd.ctlInvoker, cmd.cfg.SystemName) procmon.startMonitoring(ctx) cmd.Debugf("started process monitor: %s", time.Since(procmonStart)) - fabricCacheStart := time.Now() - fabricCache := newLocalFabricCache(cmd.Logger, ficEnabled).WithConfig(cmd.cfg) - if len(cmd.cfg.FabricInterfaces) > 0 { - // Cache is required to use user-defined fabric interfaces - fabricCache.enabled.SetTrue() - nf := NUMAFabricFromConfig(cmd.Logger, cmd.cfg.FabricInterfaces) - fabricCache.Cache(ctx, nf) - } - cmd.Debugf("created fabric cache: %s", time.Since(fabricCacheStart)) - drpcRegStart := time.Now() drpcServer.RegisterRPCModule(NewSecurityModule(cmd.Logger, cmd.cfg.TransportConfig)) - drpcServer.RegisterRPCModule(&mgmtModule{ - log: cmd.Logger, - sys: cmd.cfg.SystemName, - ctlInvoker: cmd.ctlInvoker, - attachInfo: newAttachInfoCache(cmd.Logger, aicEnabled), - fabricInfo: fabricCache, - numaGetter: hwprov.DefaultProcessNUMAProvider(cmd.Logger), - fabricScanner: hwprov.DefaultFabricScanner(cmd.Logger), - devClassGetter: hwprov.DefaultNetDevClassProvider(cmd.Logger), - devStateGetter: hwprov.DefaultNetDevStateProvider(cmd.Logger), - monitor: procmon, - providerIdx: cmd.cfg.ProviderIdx, - }) + mgmtMod := &mgmtModule{ + log: cmd.Logger, + sys: cmd.cfg.SystemName, + ctlInvoker: cmd.ctlInvoker, + cache: cache, + numaGetter: hwprov.DefaultProcessNUMAProvider(cmd.Logger), + monitor: procmon, + providerIdx: cmd.cfg.ProviderIdx, + } + drpcServer.RegisterRPCModule(mgmtMod) cmd.Debugf("registered dRPC modules: %s", time.Since(drpcRegStart)) hwlocStart := time.Now() @@ -150,7 +140,7 @@ func (cmd *startCmd) Execute(_ []string) error { signals := make(chan os.Signal) finish := make(chan struct{}) - signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE, syscall.SIGUSR1) + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGPIPE, syscall.SIGUSR1, syscall.SIGUSR2) // Anonymous goroutine to wait on the signals channel and tell the // program to finish when it receives a signal. Since we notify on // SIGINT and SIGTERM we should only catch these on a kill or ctrl+c @@ -166,6 +156,9 @@ func (cmd *startCmd) Execute(_ []string) error { case syscall.SIGUSR1: cmd.Infof("Signal received. Caught %s; flushing open pool handles", sig) procmon.FlushAllHandles(ctx) + case syscall.SIGUSR2: + cmd.Infof("Signal received. Caught %s; refreshing caches", sig) + mgmtMod.RefreshCache(ctx) default: shutdownRcvd = time.Now() cmd.Infof("Signal received. Caught %s; shutting down", sig) diff --git a/src/control/lib/cache/cache.go b/src/control/lib/cache/cache.go new file mode 100644 index 00000000000..4ffd98e3fb7 --- /dev/null +++ b/src/control/lib/cache/cache.go @@ -0,0 +1,220 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package cache + +import ( + "context" + "fmt" + "sort" + "sync" + + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/logging" +) + +type Item interface { + Lock() + Unlock() + Key() string + Refresh(ctx context.Context) error + NeedsRefresh() bool +} + +// ItemCache is a mechanism for caching Items to keys. +type ItemCache struct { + log logging.Logger + mutex sync.RWMutex + items map[string]Item +} + +// NewItemCache creates a new ItemCache. +func NewItemCache(log logging.Logger) *ItemCache { + c := &ItemCache{ + log: log, + items: make(map[string]Item), + } + return c +} + +// Set caches an item under a given key. +func (ic *ItemCache) Set(item Item) error { + if ic == nil { + return errors.New("ItemCache is nil") + } + + if common.InterfaceIsNil(item) || item.Key() == "" { + return errors.New("invalid item") + } + + ic.mutex.Lock() + defer ic.mutex.Unlock() + + ic.set(item) + return nil +} + +func (ic *ItemCache) set(item Item) { + ic.items[item.Key()] = item +} + +// Delete fully deletes an Item from the cache. +func (ic *ItemCache) Delete(key string) { + if ic == nil { + return + } + + ic.mutex.Lock() + defer ic.mutex.Unlock() + + delete(ic.items, key) +} + +// Has checks whether any item is cached under the given key. +func (ic *ItemCache) Has(key string) bool { + if ic == nil { + return false + } + + ic.mutex.RLock() + defer ic.mutex.RUnlock() + + _, found := ic.items[key] + return found +} + +// Keys returns a sorted list of all keys in the cache. +func (ic *ItemCache) Keys() []string { + if ic == nil { + return nil + } + + keys := []string{} + for k := range ic.items { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +type ItemCreateFunc func() (Item, error) + +type errKeyNotFound struct { + key string +} + +func (e *errKeyNotFound) Error() string { + return fmt.Sprintf("key %q not found", e.key) +} + +func noopRelease() {} + +// GetOrCreate returns an item from the cache if it exists, otherwise it creates +// the item using the given function and caches it. The item must be released +// by the caller when it is safe to be modified. +func (ic *ItemCache) GetOrCreate(ctx context.Context, key string, missFn ItemCreateFunc) (Item, func(), error) { + if ic == nil { + return nil, noopRelease, errors.New("nil ItemCache") + } + + if key == "" { + return nil, noopRelease, errors.Errorf("empty string is an invalid key") + } + + if missFn == nil { + return nil, noopRelease, errors.Errorf("item create function is required") + } + + ic.mutex.Lock() + defer ic.mutex.Unlock() + + item, err := ic.get(key) + if err != nil { + ic.log.Debugf("failed to get item for key %q: %s", key, err.Error()) + item, err = missFn() + if err != nil { + return nil, noopRelease, errors.Wrapf(err, "create item for %q", key) + } + ic.log.Debugf("created item for key %q", key) + ic.set(item) + } + + if item.NeedsRefresh() { + if err := item.Refresh(ctx); err != nil { + return nil, noopRelease, errors.Wrapf(err, "fetch data for %q", key) + } + ic.log.Debugf("refreshed item %q", key) + } + item.Lock() + + return item, item.Unlock, nil +} + +// Get returns an item from the cache if it exists, otherwise it returns an +// error. The item must be released by the caller when it is safe to be modified. +func (ic *ItemCache) Get(ctx context.Context, key string) (Item, func(), error) { + if ic == nil { + return nil, noopRelease, errors.New("nil ItemCache") + } + + if key == "" { + return nil, noopRelease, errors.Errorf("empty string is an invalid key") + } + + ic.mutex.Lock() + defer ic.mutex.Unlock() + + item, err := ic.get(key) + if err != nil { + return nil, noopRelease, err + } + + if item.NeedsRefresh() { + if err := item.Refresh(ctx); err != nil { + return nil, noopRelease, errors.Wrapf(err, "fetch data for %q", key) + } + ic.log.Debugf("refreshed item %q", key) + } + item.Lock() + + return item, item.Unlock, nil +} + +func (ic *ItemCache) get(key string) (Item, error) { + val, ok := ic.items[key] + if ok { + return val, nil + } + return nil, &errKeyNotFound{key: key} +} + +// Refresh forces a re-fetch of all items in the cache. +func (ic *ItemCache) Refresh(ctx context.Context, keys ...string) error { + if ic == nil { + return errors.New("nil ItemCache") + } + + ic.mutex.Lock() + defer ic.mutex.Unlock() + + if len(keys) == 0 { + keys = ic.Keys() + } + + for _, key := range keys { + item, err := ic.get(key) + if err != nil { + return err + } + + if err := item.Refresh(ctx); err != nil { + return errors.Wrapf(err, "failed to refresh cached item %q", item.Key()) + } + } + return nil +} diff --git a/src/control/lib/cache/cache_test.go b/src/control/lib/cache/cache_test.go new file mode 100644 index 00000000000..6d1aeb465ea --- /dev/null +++ b/src/control/lib/cache/cache_test.go @@ -0,0 +1,563 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package cache + +import ( + "context" + "testing" + + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/logging" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/pkg/errors" +) + +func TestCache_NewItemCache(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + ic := NewItemCache(log) + + if ic == nil { + t.Fatal("failed to create ItemCache") + } + + if ic.items == nil { + t.Fatal("didn't set up item map") + } + + if ic.log != log { + t.Fatal("didn't preserve logger") + } +} + +type mockItem struct { + ItemKey string + ID string + RefreshErr error + NeedsRefreshResult bool +} + +func (m *mockItem) Lock() {} + +func (m *mockItem) Unlock() {} + +func (m *mockItem) Key() string { + return m.ItemKey +} + +func (m *mockItem) Refresh(ctx context.Context) error { + return m.RefreshErr +} + +func (m *mockItem) NeedsRefresh() bool { + return m.NeedsRefreshResult +} + +func testMockItem(id ...string) *mockItem { + mock := &mockItem{ItemKey: "mock"} + if len(id) > 0 { + mock.ID = id[0] + } + return mock +} + +func TestCache_ItemCache_Set(t *testing.T) { + for name, tc := range map[string]struct { + nilCache bool + alreadyCached map[string]Item + val *mockItem + expErr error + expCached bool + }{ + "nil cache": { + nilCache: true, + val: testMockItem(), + expErr: errors.New("nil"), + }, + "nil item": { + expErr: errors.New("invalid item"), + }, + "empty key": { + val: &mockItem{}, + expErr: errors.New("invalid item"), + }, + "cached": { + val: testMockItem(), + expCached: true, + }, + "overwrite": { + alreadyCached: map[string]Item{ + "mock": testMockItem("old"), + }, + val: testMockItem("new"), + expCached: true, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + var ic *ItemCache + if !tc.nilCache { + ic = NewItemCache(log) + } + err := ic.Set(tc.val) + + test.CmpErr(t, tc.expErr, err) + + if ic == nil || tc.val == nil { + return + } + + item, ok := ic.items[tc.val.ItemKey] + if tc.expCached { + if !ok { + t.Fatalf("expected %q to be cached", tc.val.ItemKey) + } + + if diff := cmp.Diff(tc.val, item); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + } else { + if !ok { + return + } + + if diff := cmp.Diff(tc.val, item); diff == "" { + t.Fatalf("value was not supposed to be cached") + } + } + }) + } +} + +func TestCache_ItemCache_GetOrCreate(t *testing.T) { + defaultCreate := func() (Item, error) { + return testMockItem("default"), nil + } + + for name, tc := range map[string]struct { + nilCache bool + key string + createFunc ItemCreateFunc + alreadyCached map[string]Item + expResult Item + expErr error + }{ + "nil": { + nilCache: true, + key: "mock", + createFunc: defaultCreate, + expErr: errors.New("nil"), + }, + "empty key": { + key: "", + createFunc: defaultCreate, + expErr: errors.New("invalid key"), + }, + "nil create func": { + key: "mock", + expErr: errors.New("create function is required"), + }, + "cached": { + key: "mock", + createFunc: func() (Item, error) { + return nil, errors.New("shouldn't call create") + }, + alreadyCached: map[string]Item{ + "mock": testMockItem("cached"), + }, + expResult: testMockItem("cached"), + }, + "create func failed": { + key: "mock", + createFunc: func() (Item, error) { + return nil, errors.New("mock create") + }, + expErr: errors.New("mock create"), + }, + "created": { + key: "mock", + createFunc: defaultCreate, + expResult: testMockItem("default"), + }, + "refresh failed": { + key: "mock", + createFunc: func() (Item, error) { + mi := testMockItem("default") + mi.NeedsRefreshResult = true + mi.RefreshErr = errors.New("mock item refresh") + return mi, nil + }, + expErr: errors.New("mock item refresh"), + }, + "refresh success": { + key: "mock", + createFunc: func() (Item, error) { + mi := testMockItem("default") + mi.NeedsRefreshResult = true + return mi, nil + }, + expResult: &mockItem{ + ItemKey: "mock", + ID: "default", + NeedsRefreshResult: true, + }, + }, + "no refresh needed": { + key: "mock", + createFunc: func() (Item, error) { + return &mockItem{ + ItemKey: "mock", + ID: "default", + RefreshErr: errors.New("should not call refresh"), + }, nil + }, + expResult: testMockItem("default"), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + var ic *ItemCache + if !tc.nilCache { + ic = NewItemCache(log) + if tc.alreadyCached != nil { + ic.items = tc.alreadyCached + } + } + + result, cleanup, err := ic.GetOrCreate(test.Context(t), tc.key, tc.createFunc) + + if cleanup == nil { + t.Fatal("expected non-nil cleanup function") + } + defer cleanup() + + test.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResult, result, cmpopts.IgnoreFields(mockItem{}, "RefreshErr")); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} + +func TestCache_ItemCache_Get(t *testing.T) { + for name, tc := range map[string]struct { + nilCache bool + key string + alreadyCached map[string]Item + expResult Item + expErr error + }{ + "nil": { + nilCache: true, + key: "mock", + expErr: errors.New("nil"), + }, + "empty key": { + key: "", + expErr: errors.New("invalid key"), + }, + "missing": { + key: "mock", + expErr: &errKeyNotFound{key: "mock"}, + }, + "refresh failed": { + key: "mock", + alreadyCached: map[string]Item{ + "mock": &mockItem{ + ItemKey: "mock", + NeedsRefreshResult: true, + RefreshErr: errors.New("mock item refresh"), + }, + }, + expErr: errors.New("mock item refresh"), + }, + "refresh success": { + key: "mock", + alreadyCached: map[string]Item{ + "mock": &mockItem{ + ItemKey: "mock", + NeedsRefreshResult: true, + }, + }, + expResult: &mockItem{ + ItemKey: "mock", + NeedsRefreshResult: true, + }, + }, + "no refresh needed": { + key: "mock", + alreadyCached: map[string]Item{ + "mock": &mockItem{ + ItemKey: "mock", + RefreshErr: errors.New("should not call refresh"), + }, + }, + expResult: testMockItem(), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + var ic *ItemCache + if !tc.nilCache { + ic = NewItemCache(log) + if tc.alreadyCached != nil { + ic.items = tc.alreadyCached + } + } + + result, cleanup, err := ic.Get(test.Context(t), tc.key) + + if cleanup == nil { + t.Fatal("expected non-nil cleanup function") + } + defer cleanup() + + test.CmpErr(t, tc.expErr, err) + if diff := cmp.Diff(tc.expResult, result, cmpopts.IgnoreFields(mockItem{}, "RefreshErr")); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} + +func TestCache_ItemCache_Refresh(t *testing.T) { + for name, tc := range map[string]struct { + nilCache bool + keys []string + cache map[string]Item + expErr error + }{ + "nil": { + nilCache: true, + expErr: errors.New("nil"), + }, + "no items": {}, + "refresh fails": { + cache: map[string]Item{ + "mock": &mockItem{ + ItemKey: "mock", + RefreshErr: errors.New("mock refresh"), + }, + }, + expErr: errors.New("mock refresh"), + }, + "success": { + cache: map[string]Item{ + "mock": testMockItem(), + }, + }, + "specific key": { + keys: []string{"one"}, + cache: map[string]Item{ + "one": &mockItem{ + ItemKey: "one", + }, + "two": &mockItem{ + ItemKey: "two", + RefreshErr: errors.New("shouldn't call two"), + }, + "three": &mockItem{ + ItemKey: "three", + RefreshErr: errors.New("shouldn't call three"), + }, + }, + }, + "multiple keys": { + keys: []string{"one"}, + cache: map[string]Item{ + "one": &mockItem{ + ItemKey: "one", + }, + "two": &mockItem{ + ItemKey: "two", + RefreshErr: errors.New("shouldn't call two"), + }, + "three": &mockItem{ + ItemKey: "three", + }, + }, + }, + "invalid key": { + keys: []string{"fake"}, + cache: map[string]Item{ + "one": &mockItem{ + ItemKey: "one", + RefreshErr: errors.New("shouldn't call one"), + }, + "two": &mockItem{ + ItemKey: "two", + RefreshErr: errors.New("shouldn't call two"), + }, + "three": &mockItem{ + ItemKey: "three", + RefreshErr: errors.New("shouldn't call three"), + }, + }, + expErr: &errKeyNotFound{"fake"}, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + var ic *ItemCache + if !tc.nilCache { + ic = NewItemCache(log) + if tc.cache != nil { + ic.items = tc.cache + } + } + + err := ic.Refresh(test.Context(t), tc.keys...) + + test.CmpErr(t, tc.expErr, err) + }) + } +} + +func TestCache_ItemCache_Has(t *testing.T) { + for name, tc := range map[string]struct { + ic *ItemCache + key string + expResult bool + }{ + "nil": { + key: "something", + }, + "empty": { + ic: &ItemCache{}, + key: "mock", + }, + "success": { + ic: &ItemCache{ + items: map[string]Item{ + "mock": testMockItem(), + }, + }, + key: "mock", + expResult: true, + }, + } { + t.Run(name, func(t *testing.T) { + test.AssertEqual(t, tc.expResult, tc.ic.Has(tc.key), "") + }) + } +} + +func TestCache_ItemCache_Keys(t *testing.T) { + for name, tc := range map[string]struct { + nilCache bool + cached map[string]Item + expKeys []string + }{ + "nil": { + nilCache: true, + }, + "empty": { + expKeys: []string{}, + }, + "one": { + cached: map[string]Item{ + "something": &mockItem{}, + }, + expKeys: []string{"something"}, + }, + "multi": { + cached: map[string]Item{ + "one": &mockItem{}, + "ring": &mockItem{}, + "to": &mockItem{}, + "rule": &mockItem{}, + "them": &mockItem{}, + "all": &mockItem{}, + }, + expKeys: []string{"all", "one", "ring", "rule", "them", "to"}, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + var ic *ItemCache + if !tc.nilCache { + ic = NewItemCache(log) + if tc.cached != nil { + ic.items = tc.cached + } + } + + keys := ic.Keys() + + if diff := cmp.Diff(tc.expKeys, keys); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} + +func TestCache_ItemCache_Delete(t *testing.T) { + for name, tc := range map[string]struct { + ic *ItemCache + key string + expCache map[string]Item + }{ + "nil": { + key: "dontcare", + }, + "empty": { + ic: &ItemCache{}, + key: "dontcare", + }, + "key not found": { + ic: &ItemCache{ + items: map[string]Item{ + "one": testMockItem("1"), + "two": testMockItem("2"), + "three": testMockItem("3"), + }, + }, + key: "four", + expCache: map[string]Item{ + "one": testMockItem("1"), + "two": testMockItem("2"), + "three": testMockItem("3"), + }, + }, + "success": { + ic: &ItemCache{ + items: map[string]Item{ + "one": testMockItem("1"), + "two": testMockItem("2"), + "three": testMockItem("3"), + }, + }, + key: "two", + expCache: map[string]Item{ + "one": testMockItem("1"), + "three": testMockItem("3"), + }, + }, + } { + t.Run(name, func(t *testing.T) { + tc.ic.Delete(tc.key) + + if tc.ic != nil { + if diff := cmp.Diff(tc.expCache, tc.ic.items); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + } + }) + } +} diff --git a/src/control/lib/hardware/fabric.go b/src/control/lib/hardware/fabric.go index e3d06b08669..412073fc6e6 100644 --- a/src/control/lib/hardware/fabric.go +++ b/src/control/lib/hardware/fabric.go @@ -1065,7 +1065,7 @@ func WaitFabricReady(ctx context.Context, log logging.Logger, params WaitFabricR func loopFabricReady(log logging.Logger, params WaitFabricReadyParams, ch chan error) { readySet := common.NewStringSet() unusableSet := common.NewStringSet() - log.Debug("waiting for fabric interfaces to become ready...") + log.Trace("waiting for fabric interfaces to become ready...") for { for _, iface := range params.FabricIfaces { // No need to check again if we marked it ready or unusable diff --git a/src/control/lib/hardware/fabric_test.go b/src/control/lib/hardware/fabric_test.go index 1250e1d3ff7..8512063b053 100644 --- a/src/control/lib/hardware/fabric_test.go +++ b/src/control/lib/hardware/fabric_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2021-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -1344,6 +1344,7 @@ func TestHardware_NewFabricScanner(t *testing.T) { ), test.CmpOptIgnoreFieldAnyType("log"), cmpopts.IgnoreFields(FabricScanner{}, "mutex"), + cmpopts.IgnoreFields(MockNetDevClassProvider{}, "mutex"), ); diff != "" { t.Fatalf("(-want, +got)\n%s\n", diff) } @@ -1444,9 +1445,9 @@ func TestHardware_FabricScanner_Scan(t *testing.T) { "already initialized": { config: GetMockFabricScannerConfig(), builders: []FabricInterfaceSetBuilder{ - &mockFabricInterfaceSetBuilder{}, - &mockFabricInterfaceSetBuilder{}, - &mockFabricInterfaceSetBuilder{}, + &MockFabricInterfaceSetBuilder{}, + &MockFabricInterfaceSetBuilder{}, + &MockFabricInterfaceSetBuilder{}, }, expErr: errors.New("no fabric interfaces found"), }, @@ -1542,9 +1543,9 @@ func TestHardware_FabricScanner_Scan(t *testing.T) { }, providers: []string{"ofi+tcp"}, builders: []FabricInterfaceSetBuilder{ - &mockFabricInterfaceSetBuilder{}, - &mockFabricInterfaceSetBuilder{}, - &mockFabricInterfaceSetBuilder{}, + &MockFabricInterfaceSetBuilder{}, + &MockFabricInterfaceSetBuilder{}, + &MockFabricInterfaceSetBuilder{}, }, expBuildersChanged: true, expResult: NewFabricInterfaceSet( @@ -1706,11 +1707,11 @@ func TestHardware_FabricScanner_Scan(t *testing.T) { if !tc.expBuildersChanged { for _, b := range tc.builders { - mock, ok := b.(*mockFabricInterfaceSetBuilder) + mock, ok := b.(*MockFabricInterfaceSetBuilder) if !ok { t.Fatalf("bad test setup: test builders aren't mocks") } - test.AssertEqual(t, 1, mock.buildPartCalled, "") + test.AssertEqual(t, 1, mock.BuildPartCalled, "") } } }) @@ -1804,6 +1805,7 @@ func TestHardware_defaultFabricInterfaceSetBuilders(t *testing.T) { cmp.AllowUnexported(MockFabricInterfaceProvider{}), cmp.AllowUnexported(MockNetDevClassProvider{}), test.CmpOptIgnoreFieldAnyType("log"), + cmpopts.IgnoreFields(MockNetDevClassProvider{}, "mutex"), ); diff != "" { t.Fatalf("(-want, +got)\n%s\n", diff) } diff --git a/src/control/lib/hardware/mocks.go b/src/control/lib/hardware/mocks.go index fe7c1b29f5c..33ce193440d 100644 --- a/src/control/lib/hardware/mocks.go +++ b/src/control/lib/hardware/mocks.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2021-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,8 +9,10 @@ package hardware import ( "context" "fmt" + "sync" "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/logging" "github.com/pkg/errors" ) @@ -103,7 +105,7 @@ func (n *NUMANode) WithBlockDevices(devices []*BlockDevice) *NUMANode { // GetMockFabricScannerConfig gets a FabricScannerConfig for testing. func GetMockFabricScannerConfig() *FabricScannerConfig { return &FabricScannerConfig{ - TopologyProvider: &MockTopologyProvider{}, + TopologyProvider: &MockTopologyProvider{GetTopoReturn: &Topology{}}, FabricInterfaceProviders: []FabricInterfaceProvider{ &MockFabricInterfaceProvider{}, }, @@ -173,6 +175,7 @@ type MockGetNetDevClassResult struct { // MockNetDevClassProvider is a NetDevClassProvider for testing. type MockNetDevClassProvider struct { + mutex sync.Mutex GetNetDevClassReturn []MockGetNetDevClassResult GetNetDevClassCalled int } @@ -182,22 +185,29 @@ func (m *MockNetDevClassProvider) GetNetDevClass(in string) (NetDevClass, error) return 0, nil } + m.mutex.Lock() + defer m.mutex.Unlock() result := m.GetNetDevClassReturn[m.GetNetDevClassCalled%len(m.GetNetDevClassReturn)] - if in != result.ExpInput { + if result.ExpInput != "" && in != result.ExpInput { return 0, errors.Errorf("MOCK: unexpected input %q != %q", in, result.ExpInput) } m.GetNetDevClassCalled++ return result.NDC, result.Err } -type mockFabricInterfaceSetBuilder struct { - buildPartCalled int - buildPartReturn error +// MockFabricInterfaceSetBuilder is a FabricInterfaceSetBuilder for testing. +type MockFabricInterfaceSetBuilder struct { + BuildPartCalled int + BuildPartUpdateFis func(*FabricInterfaceSet) + BuildPartReturn error } -func (m *mockFabricInterfaceSetBuilder) BuildPart(_ context.Context, _ *FabricInterfaceSet) error { - m.buildPartCalled++ - return m.buildPartReturn +func (m *MockFabricInterfaceSetBuilder) BuildPart(_ context.Context, fis *FabricInterfaceSet) error { + m.BuildPartCalled++ + if m.BuildPartUpdateFis != nil { + m.BuildPartUpdateFis(fis) + } + return m.BuildPartReturn } // MockNetDevStateResult is a structure for injecting results into MockNetDevStateProvider. @@ -208,11 +218,14 @@ type MockNetDevStateResult struct { // MockNetDevStateProvider is a fake NetDevStateProvider for testing. type MockNetDevStateProvider struct { + sync.Mutex GetStateReturn []MockNetDevStateResult GetStateCalled []string } func (m *MockNetDevStateProvider) GetNetDevState(iface string) (NetDevState, error) { + m.Lock() + defer m.Unlock() m.GetStateCalled = append(m.GetStateCalled, iface) if len(m.GetStateReturn) == 0 { @@ -225,3 +238,34 @@ func (m *MockNetDevStateProvider) GetNetDevState(iface string) (NetDevState, err } return m.GetStateReturn[idx].State, m.GetStateReturn[idx].Err } + +// MockFabricScannerConfig provides parameters for constructing a mock fabric scanner. +type MockFabricScannerConfig struct { + ScanResult *FabricInterfaceSet +} + +// MockFabricScanner generates a mock FabricScanner for testing. +func MockFabricScanner(log logging.Logger, cfg *MockFabricScannerConfig) *FabricScanner { + config := GetMockFabricScannerConfig() + providers := make([]string, 0) + fiList := make([]*FabricInterface, 0) + for _, fi := range cfg.ScanResult.byName { + providers = append(providers, fi.Providers.byName.keys()...) + fiList = append(fiList, fi) + } + builders := []FabricInterfaceSetBuilder{ + &MockFabricInterfaceSetBuilder{ + BuildPartUpdateFis: func(fis *FabricInterfaceSet) { + for _, fi := range fiList { + fis.Update(fi) + } + }, + }, + } + return &FabricScanner{ + log: log, + config: config, + builders: builders, + providers: common.NewStringSet(providers...), + } +} diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml index 1a25b255042..5f7e396c53c 100644 --- a/utils/config/daos_agent.yml +++ b/utils/config/daos_agent.yml @@ -74,6 +74,12 @@ ## default: false #disable_caching: true +## Automatically expire the agent's remote cache after a period of time defined in +## minutes. It will refresh the data the next time it is requested. +# +## default: 0 (never expires) +#cache_expiration: 30 + ## Ignore a subset of fabric interfaces when selecting an interface for client ## applications. # From 0a0807af08be40e590cfe1ea8e9c614a4ea42bb9 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Tue, 19 Mar 2024 21:48:35 +0000 Subject: [PATCH 28/28] Fix src/rdb/raft reference Required-githooks: true Signed-off-by: Kris Jacque --- src/rdb/raft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rdb/raft b/src/rdb/raft index efa15f46360..12dbc1595fa 160000 --- a/src/rdb/raft +++ b/src/rdb/raft @@ -1 +1 @@ -Subproject commit efa15f46360078ff427562c53d23ed6f0e4a06ac +Subproject commit 12dbc1595fad8b570de1e336205f994f2b0e22f5