Skip to content

Commit

Permalink
Update vGPUmonitor to add dynamic adjustment on core and memory limit (
Browse files Browse the repository at this point in the history
…#624)

* update ci and initdevices logic

Signed-off-by: limengxuan <[email protected]>

* prepare to update v2.4.1

Signed-off-by: limengxuan <[email protected]>

* update vGPUmonitor to add dynamic adjustment on core and memory limit

Signed-off-by: limengxuan <[email protected]>

---------

Signed-off-by: limengxuan <[email protected]>
  • Loading branch information
archlitchi authored Nov 18, 2024
1 parent 5396897 commit 6545472
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/monitor/nvidia/cudevshr.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ type UsageInfo interface {
DeviceMemoryOffset(idx int) uint64
DeviceMemoryTotal(idx int) uint64
DeviceSmUtil(idx int) uint64
SetDeviceSmLimit(l uint64)
IsValidUUID(idx int) bool
DeviceUUID(idx int) string
DeviceMemoryLimit(idx int) uint64
SetDeviceMemoryLimit(l uint64)
LastKernelTime() int64
//UsedMemory(idx int) (uint64, error)
GetPriority() int
Expand Down
16 changes: 16 additions & 0 deletions pkg/monitor/nvidia/v0/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ func (s Spec) DeviceSmUtil(idx int) uint64 {
return v
}

func (s Spec) SetDeviceSmLimit(l uint64) {
idx := uint64(0)
for idx < s.sr.num {
s.sr.smLimit[idx] = l
idx += 1
}
}

func (s Spec) IsValidUUID(idx int) bool {
return s.sr.uuids[idx].uuid[0] != 0
}
Expand All @@ -141,6 +149,14 @@ func (s Spec) DeviceMemoryLimit(idx int) uint64 {
return s.sr.limit[idx]
}

func (s Spec) SetDeviceMemoryLimit(l uint64) {
idx := uint64(0)
for idx < s.sr.num {
s.sr.limit[idx] = l
idx += 1
}
}

func (s Spec) LastKernelTime() int64 {
return 0
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/monitor/nvidia/v1/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ func (s Spec) DeviceSmUtil(idx int) uint64 {
return v
}

func (s Spec) SetDeviceSmLimit(l uint64) {
idx := uint64(0)
for idx < s.sr.num {
s.sr.smLimit[idx] = l
idx += 1
}
}

func (s Spec) IsValidUUID(idx int) bool {
return s.sr.uuids[idx].uuid[0] != 0
}
Expand All @@ -148,6 +156,14 @@ func (s Spec) DeviceMemoryLimit(idx int) uint64 {
return s.sr.limit[idx]
}

func (s Spec) SetDeviceMemoryLimit(l uint64) {
idx := uint64(0)
for idx < s.sr.num {
s.sr.limit[idx] = l
idx += 1
}
}

func (s Spec) LastKernelTime() int64 {
return s.sr.lastKernelTime
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/scheduler/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ func (m *podManager) addPod(pod *corev1.Pod, nodeID string, devices util.PodDevi
pi := &podInfo{Name: pod.Name, UID: pod.UID, Namespace: pod.Namespace, NodeID: nodeID, Devices: devices}
m.pods[pod.UID] = pi
klog.Infof("Pod added: Name: %s, UID: %s, Namespace: %s, NodeID: %s", pod.Name, pod.UID, pod.Namespace, nodeID)
} else {
m.pods[pod.UID].Devices = devices
}
}

Expand Down

0 comments on commit 6545472

Please sign in to comment.