AliyunContainerService · Sakuralbj · Jul 9, 2019 · Jul 4, 2019 · Jul 30, 2019
diff --git a/MPSUserGuide.md b/MPSUserGuide.md
@@ -0,0 +1,43 @@
+# How to start MPS
+
+1. Start MPS daemon in node  
+    1.1 export CUDA_VISIBLE_DEVICES=ID 
+    > Specify which GPU’s should be visible to a CUDA application.  
+
+    1.2 export CUDA_MPS_PIPE_DIRECTORY=Directory
+    > The MPS control daemon, the MPS server, and the associated MPS clients communicate with each other via named pipes and UNIX domain sockets.  
+    The default directory for these pipes and sockets is /tmp/nvidia-mps. 
+    CUDA_MPS_PIPE_DIRECTORY, can be used to override the location of these pipes and sockets.
+
+    1.3 export CUDA_MPS_LOG_DIRECTORY=Directory
+    > The MPS control daemon maintains a control.log file and server.log file in the directory.
+
+    1.4 nvidia-smi -i ID -c EXCLUSIVE_PROCESS  
+    > Three Compute Modes are supported via settings accessible in nvidia-smi.PROHIBITED ，EXCLUSIVE_PROCESS，DEFAULT.Make sure your GPU is in EXCLUSIVE_PROCESS mode.
+
+    1.5 nvidia-cuda-mps-control -d 
+    > start MPS daemon
+
+2.  Add additional information in yaml    
+
+    2.1 set hostIPC=true in podspec  
+    > spec:  
+          hostIPC: true  
+
+    2.2 add environment information
+    * CUDA_MPS_ACTIVE_THREAD_PERCENTAGE="number" //0-100
+      > setting this in a MPS client’s environment will constraint the portion of available threads of each device.  
+    * CUDA_MPS_PIPE_DIRECTORY=Directory  
+      > Make sure this directory is same as what you set on node.
+
+    2.3 add volume information 
+    * volumeMount 
+    > The same as  CUDA_MPS_PIPE_DIRECTORY set on node.
+    * volumes 
+    > hostPath the same as  CUDA_MPS_PIPE_DIRECTORY set on node. 
+
+    2.4 A example of addtional infromation when I set CUDA_MPS_PIPE_DIRECTORY=/root/nvidia-mps
+    ![example](https://ws3.sinaimg.cn/large/006tNc79ly1g4tqjrvr8rj30ou0f075w.jpg)
+
+
+
diff --git a/cmd/nvidia/main.go b/cmd/nvidia/main.go
@@ -11,12 +11,13 @@ var (
 	mps         = flag.Bool("mps", false, "Enable or Disable MPS")
 	healthCheck = flag.Bool("health-check", false, "Enable or disable Health check")
 	memoryUnit  = flag.String("memory-unit", "GiB", "Set memoryUnit of the GPU Memroy, support 'GiB' and 'MiB'")
+	mpspipe     = flag.String("mps-pipe", "/tmp/nvidia-mps", " pipes and UNIX domain sockets")
 )
 
 func main() {
 	flag.Parse()
 	log.V(1).Infoln("Start gpushare device plugin")
-	ngm := nvidia.NewSharedGPUManager(*mps, *healthCheck, translatememoryUnits(*memoryUnit))
+	ngm := nvidia.NewSharedGPUManager(*mps, *healthCheck, *mpspipe, translatememoryUnits(*memoryUnit))
 	err := ngm.Run()
 	if err != nil {
 		log.Fatalf("Failed due to %v", err)

diff --git a/device-plugin-ds.yaml b/device-plugin-ds.yaml
@@ -25,6 +25,8 @@ spec:
           - gpushare-device-plugin-v2
           - -logtostderr
           - --v=5
+        # - --mps-pipe=/root/nvidia-mps  // mps-client and mps-server communicate through the directory.You can modify it.
+        # - --mps=true     //if you want to use mps
           - --memory-unit=GiB
         resources:
           limits:

diff --git a/pkg/gpu/nvidia/allocate.go b/pkg/gpu/nvidia/allocate.go
@@ -21,18 +21,22 @@ func init() {
 	kubeInit()
 }
 
-func buildErrResponse(reqs *pluginapi.AllocateRequest, podReqGPU uint) *pluginapi.AllocateResponse {
+func (m *NvidiaDevicePlugin) buildErrResponse(reqs *pluginapi.AllocateRequest, podReqGPU uint) *pluginapi.AllocateResponse {
 	responses := pluginapi.AllocateResponse{}
 	for _, req := range reqs.ContainerRequests {
 		response := pluginapi.ContainerAllocateResponse{
 			Envs: map[string]string{
-				envNVGPU:               fmt.Sprintf("no-gpu-has-%dMiB-to-run", podReqGPU),
+				envNVGPU:               fmt.Sprintf("no-gpu-has-%dGiB-to-run", podReqGPU),
 				EnvResourceIndex:       fmt.Sprintf("-1"),
 				EnvResourceByPod:       fmt.Sprintf("%d", podReqGPU),
 				EnvResourceByContainer: fmt.Sprintf("%d", uint(len(req.DevicesIDs))),
 				EnvResourceByDev:       fmt.Sprintf("%d", getGPUMemory()),
 			},
 		}
+		if m.mps {
+			response.Envs[EnvMPSActiveThreadPercentage] = fmt.Sprintf("%d", 100*uint(len(req.DevicesIDs))/getGPUMemory())
+			response.Envs[EnvMPSPipeDirectory] = fmt.Sprintf(m.mpspipe)
+		}
 		responses.ContainerResponses = append(responses.ContainerResponses, &response)
 	}
 	return &responses
@@ -62,7 +66,7 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
 	pods, err := getCandidatePods()
 	if err != nil {
 		log.Infof("invalid allocation requst: Failed to find candidate pods due to %v", err)
-		return buildErrResponse(reqs, podReqGPU), nil
+		return m.buildErrResponse(reqs, podReqGPU), nil
 	}
 
 	if log.V(4) {
@@ -106,7 +110,7 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
 		}
 
 		if id < 0 {
-			return buildErrResponse(reqs, podReqGPU), nil
+			return m.buildErrResponse(reqs, podReqGPU), nil
 		}
 
 		// 1. Create container requests
@@ -121,6 +125,15 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
 					EnvResourceByDev:       fmt.Sprintf("%d", getGPUMemory()),
 				},
 			}
+			if m.mps {
+				response.Envs[EnvMPSActiveThreadPercentage] = fmt.Sprintf("%d", 100*reqGPU/getGPUMemory())
+				response.Envs[EnvMPSPipeDirectory] = fmt.Sprintf(m.mpspipe)
+				mount := pluginapi.Mount{
+					ContainerPath: m.mpspipe,
+					HostPath:      m.mpspipe,
+				}
+				response.Mounts = append(response.Mounts, &mount)
+			}
 			responses.ContainerResponses = append(responses.ContainerResponses, &response)
 		}
 
@@ -134,25 +147,25 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
 				pod, err := clientset.CoreV1().Pods(assumePod.Namespace).Get(assumePod.Name, metav1.GetOptions{})
 				if err != nil {
 					log.Warningf("Failed due to %v", err)
-					return buildErrResponse(reqs, podReqGPU), nil
+					return m.buildErrResponse(reqs, podReqGPU), nil
 				}
 				newPod = updatePodAnnotations(pod)
 				_, err = clientset.CoreV1().Pods(newPod.Namespace).Update(newPod)
 				if err != nil {
 					log.Warningf("Failed due to %v", err)
-					return buildErrResponse(reqs, podReqGPU), nil
+					return m.buildErrResponse(reqs, podReqGPU), nil
 				}
 			} else {
 				log.Warningf("Failed due to %v", err)
-				return buildErrResponse(reqs, podReqGPU), nil
+				return m.buildErrResponse(reqs, podReqGPU), nil
 			}
 		}
 
 	} else {
 		log.Warningf("invalid allocation requst: request GPU memory %d can't be satisfied.",
 			podReqGPU)
 		// return &responses, fmt.Errorf("invalid allocation requst: request GPU memory %d can't be satisfied", reqGPU)
-		return buildErrResponse(reqs, podReqGPU), nil
+		return m.buildErrResponse(reqs, podReqGPU), nil
 	}
 
 	log.Infof("new allocated GPUs info %v", &responses)

diff --git a/pkg/gpu/nvidia/const.go b/pkg/gpu/nvidia/const.go
@@ -21,14 +21,16 @@ const (
 	containerLogPathLabelKey    = "io.kubernetes.container.logpath"
 	sandboxIDLabelKey           = "io.kubernetes.sandbox.id"
 
-	envNVGPU               = "NVIDIA_VISIBLE_DEVICES"
-	EnvResourceIndex       = "ALIYUN_COM_GPU_MEM_IDX"
-	EnvResourceByPod       = "ALIYUN_COM_GPU_MEM_POD"
-	EnvResourceByContainer = "ALIYUN_COM_GPU_MEM_CONTAINER"
-	EnvResourceByDev       = "ALIYUN_COM_GPU_MEM_DEV"
-	EnvAssignedFlag        = "ALIYUN_COM_GPU_MEM_ASSIGNED"
-	EnvResourceAssumeTime  = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
-	EnvResourceAssignTime  = "ALIYUN_COM_GPU_MEM_ASSIGN_TIME"
+	envNVGPU                     = "NVIDIA_VISIBLE_DEVICES"
+	EnvResourceIndex             = "ALIYUN_COM_GPU_MEM_IDX"
+	EnvResourceByPod             = "ALIYUN_COM_GPU_MEM_POD"
+	EnvResourceByContainer       = "ALIYUN_COM_GPU_MEM_CONTAINER"
+	EnvResourceByDev             = "ALIYUN_COM_GPU_MEM_DEV"
+	EnvAssignedFlag              = "ALIYUN_COM_GPU_MEM_ASSIGNED"
+	EnvResourceAssumeTime        = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
+	EnvMPSPipeDirectory          = "CUDA_MPS_PIPE_DIRECTORY"
+	EnvMPSActiveThreadPercentage = "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"
+	EnvResourceAssignTime        = "ALIYUN_COM_GPU_MEM_ASSIGN_TIME"
 
 	GiBPrefix = MemoryUnit("GiB")
 	MiBPrefix = MemoryUnit("MiB")

diff --git a/pkg/gpu/nvidia/gpumanager.go b/pkg/gpu/nvidia/gpumanager.go
@@ -14,13 +14,15 @@ import (
 type sharedGPUManager struct {
 	enableMPS   bool
 	healthCheck bool
+	mpspipe     string
 }
 
-func NewSharedGPUManager(enableMPS, healthCheck bool, bp MemoryUnit) *sharedGPUManager {
+func NewSharedGPUManager(enableMPS, healthCheck bool, mpspipe string, bp MemoryUnit) *sharedGPUManager {
 	metric = bp
 	return &sharedGPUManager{
 		enableMPS:   enableMPS,
 		healthCheck: healthCheck,
+		mpspipe:     mpspipe,
 	}
 }
 
@@ -61,7 +63,7 @@ L:
 				devicePlugin.Stop()
 			}
 
-			devicePlugin = NewNvidiaDevicePlugin(ngm.enableMPS, ngm.healthCheck)
+			devicePlugin = NewNvidiaDevicePlugin(ngm.enableMPS, ngm.healthCheck, ngm.mpspipe)
 			if err := devicePlugin.Serve(); err != nil {
 				log.Warningf("Failed to start device plugin due to %v", err)
 			} else {

diff --git a/pkg/gpu/nvidia/server.go b/pkg/gpu/nvidia/server.go
@@ -22,6 +22,7 @@ type NvidiaDevicePlugin struct {
 	devIndxMap   map[uint]string
 	socket       string
 	mps          bool
+	mpspipe      string
 	healthCheck  bool
 
 	stop   chan struct{}
@@ -32,7 +33,7 @@ type NvidiaDevicePlugin struct {
 }
 
 // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
-func NewNvidiaDevicePlugin(mps, healthCheck bool) *NvidiaDevicePlugin {
+func NewNvidiaDevicePlugin(mps, healthCheck bool, mpspipe string) *NvidiaDevicePlugin {
 	devs, devNameMap := getDevices()
 	devList := []string{}
 
@@ -54,10 +55,10 @@ func NewNvidiaDevicePlugin(mps, healthCheck bool) *NvidiaDevicePlugin {
 		devNameMap:   devNameMap,
 		socket:       serverSock,
 		mps:          mps,
+		mpspipe:      mpspipe,
 		healthCheck:  healthCheck,
-
-		stop:   make(chan struct{}),
-		health: make(chan *pluginapi.Device),
+		stop:         make(chan struct{}),
+		health:       make(chan *pluginapi.Device),
 	}
 }