diff --git a/CHANGELOG.md b/CHANGELOG.md index ca695e4017e22a674899d2babcbda0bfa72cf033..144824f02f9c77924b70918856676769cd7e2a30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ # IluvatarCorex Device Plugin Changelog ## v4.4.0 +- Mount `/dev/itrctl` for driver 4.4.0 compatibility - Add support for GPU resetting. -- Add support for GPU hot-plugging. +- Add real-time detection support for GPU remove/rescan. - Fix visible device environment variable issues. -- Fix possible vulnerabilities by updating dependencies. +- Fix possible vulnerabilities by updating dependencies. diff --git a/README.md b/README.md index 4b1f34aec1a633425e1ac0e7656ceb9cda2baaea..901c3c983cf1ab586cf2d2aacfc80764f946c91d 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ data: | Parameter | Default | Description | | --------------------------- | ------------------ | ------------------------------- | | `image.repository` | `ix-device-plugin` | Image repository | -| `image.tag` | `4.4.0` | Image tag | +| `image.tag` | `` | Image tag | | `image.pullPolicy` | `IfNotPresent` | Image pull policy | | `ixConfig.flags.splitboard` | `false` | Enable splitboard mode | | `ixConfig.flags.usevolcano` | `false` | Enable Volcano integration | @@ -70,7 +70,7 @@ data: #### Install with Custom Image ```bash -helm install ix-device-plugin ix-device-plugin-4.4.0.tgz \ +helm install ix-device-plugin ix-device-plugin-4.3.0.tgz \ --set image.repository=registry.local/ix-device-plugin \ --set image.tag=test \ --set image.pullPolicy=Always \ @@ -82,7 +82,7 @@ You can install the `ix-device-plugin` chart in two modes: **with Volcano plugin Enable the `usevolcano` flag: ```bash -helm install ix-device-plugin ix-device-plugin-4.4.0.tgz \ +helm install ix-device-plugin ix-device-plugin-4.3.0.tgz \ --set ixConfig.flags.usevolcano=true \ -n kube-system ``` @@ -121,7 +121,7 @@ spec: drop: - ALL privileged: true - image: "ix-device-plugin:4.4.0" + image: "ix-device-plugin:4.3.0" imagePullPolicy: IfNotPresent livenessProbe: exec: @@ -199,7 +199,7 @@ EOF ```shell kubectl logs corex-example +-----------------------------------------------------------------------------+ -| IX-ML: 4.0.0 Driver Version: 4.1.0 CUDA Version: N/A | +| IX-ML: Driver Version: CUDA Version: | |-------------------------------+----------------------+----------------------| | GPU Name | Bus-Id | Clock-SM Clock-Mem | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | diff --git a/pkg/dpm/plugin.go b/pkg/dpm/plugin.go index f3f835966775df144831e56b5df61db9a99071f3..bd68bb9c242f53d8963ce4b3e514620d6f61635d 100644 --- a/pkg/dpm/plugin.go +++ b/pkg/dpm/plugin.go @@ -19,6 +19,7 @@ package dpm import ( "fmt" + "os" "sort" "strconv" "strings" @@ -135,14 +136,14 @@ func (p *iluvatarDevicePlugin) alignedAlloc(available, required []string, size i // Allocate returns list of devices. func (p *iluvatarDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { responses := &pluginapi.AllocateResponse{} - response := &pluginapi.ContainerAllocateResponse{} klog.Infof("Allocate request: %v", reqs) - var deviceIDs []string - var replicaIDs []string var indexes []int for _, req := range reqs.ContainerRequests { + response := &pluginapi.ContainerAllocateResponse{} + var deviceIDs []string + var replicaIDs []string if p.kubeclient != nil { volcanoDevices, isVolcano := p.UseVolcano(req.DevicesIDs) @@ -192,12 +193,13 @@ func (p *iluvatarDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.All replicaIDs = append(replicaIDs, id) } } + response.Devices = append(response.Devices, p.allocateCommonDeviceSpecs()...) + response.Envs = p.allocateEnvs("IX_VISIBLE_DEVICES", deviceIDs) + response.Envs["IX_REPLICA_DEVICES"] = strings.Join(replicaIDs, ",") + responses.ContainerResponses = append(responses.ContainerResponses, response) } - response.Envs = p.allocateEnvs("IX_VISIBLE_DEVICES", deviceIDs) - response.Envs["IX_REPLICA_DEVICES"] = strings.Join(replicaIDs, ",") - klog.Infof("Allocate response: %v", responses) p.resetGpusAndDeviceSet(indexes) @@ -210,6 +212,28 @@ func (p *iluvatarDevicePlugin) allocateEnvs(envvar string, devices []string) map } } +func (p *iluvatarDevicePlugin) allocateCommonDeviceSpecs() []*pluginapi.DeviceSpec { + commonDevices := []string{ + "/dev/itrctl", + } + + var specs []*pluginapi.DeviceSpec + for _, dev := range commonDevices { + if _, err := os.Stat(dev); err != nil { + klog.Warningf("Control device %s not found on host, skipping mount", dev) + continue + } + spec := &pluginapi.DeviceSpec{ + ContainerPath: dev, + HostPath: dev, + Permissions: "rw", + } + specs = append(specs, spec) + } + + return specs +} + func (p *iluvatarDevicePlugin) allocateMountsByDeviceID(deviceID string) *pluginapi.Mount { var mount pluginapi.Mount