diff --git a/crds/embedded/virtualmachineinstances.yaml b/crds/embedded/virtualmachineinstances.yaml index c6624ebef..196797aa2 100644 --- a/crds/embedded/virtualmachineinstances.yaml +++ b/crds/embedded/virtualmachineinstances.yaml @@ -3076,6 +3076,8 @@ spec: path: description: Path defines the path to disk file in the container type: string + hotpluggable: + type: boolean required: - image type: object diff --git a/crds/embedded/virtualmachines.yaml b/crds/embedded/virtualmachines.yaml index d4774864c..b99399f64 100644 --- a/crds/embedded/virtualmachines.yaml +++ b/crds/embedded/virtualmachines.yaml @@ -3868,6 +3868,8 @@ spec: description: Path defines the path to disk file in the container type: string + hotpluggable: + type: boolean required: - image type: object @@ -4491,6 +4493,35 @@ spec: description: VolumeSource represents the source of the volume to map to the disk. properties: + containerDisk: + type: object + description: |- + ContainerDisk references a docker image, embedding a qcow or raw disk. + More info: https://kubevirt.gitbooks.io/user-guide/registry-disk.html + properties: + image: + description: Image is the name of the image with + the embedded disk. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + imagePullSecret: + description: ImagePullSecret is the name of the + Docker registry secret required to pull the image. + The secret must already exist. + type: string + path: + description: Path defines the path to disk file + in the container + type: string + hotpluggable: + type: boolean dataVolume: description: |- DataVolume represents the dynamic creation a PVC for this volume as well as diff --git a/images/virt-artifact/patches/032-hotplug-container-disk.patch b/images/virt-artifact/patches/032-hotplug-container-disk.patch new file mode 100644 index 000000000..ec7c4e62f --- /dev/null +++ b/images/virt-artifact/patches/032-hotplug-container-disk.patch @@ -0,0 +1,2291 @@ +diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json +index c4822a0448..d6bb534249 100644 +--- a/api/openapi-spec/swagger.json ++++ b/api/openapi-spec/swagger.json +@@ -12951,6 +12951,9 @@ + "image" + ], + "properties": { ++ "hotpluggable": { ++ "type": "boolean" ++ }, + "image": { + "description": "Image is the name of the image with the embedded disk.", + "type": "string", +@@ -13973,6 +13976,9 @@ + "description": "HotplugVolumeSource Represents the source of a volume to mount which are capable of being hotplugged on a live running VMI. Only one of its members may be specified.", + "type": "object", + "properties": { ++ "containerDisk": { ++ "$ref": "#/definitions/v1.ContainerDiskSource" ++ }, + "dataVolume": { + "description": "DataVolume represents the dynamic creation a PVC for this volume as well as the process of populating that PVC with a disk image.", + "$ref": "#/definitions/v1.DataVolumeSource" +diff --git a/cmd/container-disk-v2alpha/main.c b/cmd/container-disk-v2alpha/main.c +index ba855e574b..8ed76d8710 100644 +--- a/cmd/container-disk-v2alpha/main.c ++++ b/cmd/container-disk-v2alpha/main.c +@@ -179,4 +179,4 @@ int main(int argc, char **argv) { + } + + socket_check(fd, (void *)copy_path); +-} ++} +\ No newline at end of file +diff --git a/cmd/virt-chroot/main.go b/cmd/virt-chroot/main.go +index e28daa07c7..7a69b7451b 100644 +--- a/cmd/virt-chroot/main.go ++++ b/cmd/virt-chroot/main.go +@@ -20,6 +20,7 @@ var ( + cpuTime uint64 + memoryBytes uint64 + targetUser string ++ targetUserID int + ) + + func init() { +@@ -51,7 +52,12 @@ func main() { + + // Looking up users needs resources, let's do it before we set rlimits. + var u *user.User +- if targetUser != "" { ++ if targetUserID >= 0 { ++ _, _, errno := syscall.Syscall(syscall.SYS_SETUID, uintptr(targetUserID), 0, 0) ++ if errno != 0 { ++ return fmt.Errorf("failed to switch to user: %d. errno: %d", targetUserID, errno) ++ } ++ } else if targetUser != "" { + var err error + u, err = user.Lookup(targetUser) + if err != nil { +@@ -116,6 +122,7 @@ func main() { + rootCmd.PersistentFlags().Uint64Var(&memoryBytes, "memory", 0, "memory in bytes for the process") + rootCmd.PersistentFlags().StringVar(&mntNamespace, "mount", "", "mount namespace to use") + rootCmd.PersistentFlags().StringVar(&targetUser, "user", "", "switch to this targetUser to e.g. drop privileges") ++ rootCmd.PersistentFlags().IntVar(&targetUserID, "userid", -1, "switch to this targetUser to e.g. drop privileges") + + execCmd := &cobra.Command{ + Use: "exec", +@@ -136,16 +143,39 @@ func main() { + Args: cobra.MinimumNArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + var mntOpts uint = 0 ++ var dataOpts []string + + fsType := cmd.Flag("type").Value.String() + mntOptions := cmd.Flag("options").Value.String() ++ var ( ++ uid = -1 ++ gid = -1 ++ ) + for _, opt := range strings.Split(mntOptions, ",") { + opt = strings.TrimSpace(opt) +- switch opt { +- case "ro": ++ switch { ++ case opt == "ro": + mntOpts = mntOpts | syscall.MS_RDONLY +- case "bind": ++ case opt == "bind": + mntOpts = mntOpts | syscall.MS_BIND ++ case opt == "remount": ++ mntOpts = mntOpts | syscall.MS_REMOUNT ++ case strings.HasPrefix(opt, "uid="): ++ uidS := strings.TrimPrefix(opt, "uid=") ++ uidI, err := strconv.Atoi(uidS) ++ if err != nil { ++ return fmt.Errorf("failed to parse uid: %w", err) ++ } ++ uid = uidI ++ dataOpts = append(dataOpts, opt) ++ case strings.HasPrefix(opt, "gid="): ++ gidS := strings.TrimPrefix(opt, "gid=") ++ gidI, err := strconv.Atoi(gidS) ++ if err != nil { ++ return fmt.Errorf("failed to parse gid: %w", err) ++ } ++ gid = gidI ++ dataOpts = append(dataOpts, opt) + default: + return fmt.Errorf("mount option %s is not supported", opt) + } +@@ -168,8 +198,17 @@ func main() { + return fmt.Errorf("mount target invalid: %v", err) + } + defer targetFile.Close() +- +- return syscall.Mount(sourceFile.SafePath(), targetFile.SafePath(), fsType, uintptr(mntOpts), "") ++ if uid >= 0 && gid >= 0 { ++ err = os.Chown(targetFile.SafePath(), uid, gid) ++ if err != nil { ++ return fmt.Errorf("chown target failed: %w", err) ++ } ++ } ++ var data string ++ if len(dataOpts) > 0 { ++ data = strings.Join(dataOpts, ",") ++ } ++ return syscall.Mount(sourceFile.SafePath(), targetFile.SafePath(), fsType, uintptr(mntOpts), data) + }, + } + mntCmd.Flags().StringP("options", "o", "", "comma separated list of mount options") +diff --git a/manifests/generated/kv-resource.yaml b/manifests/generated/kv-resource.yaml +index 66d1b01dbf..43e36b7195 100644 +--- a/manifests/generated/kv-resource.yaml ++++ b/manifests/generated/kv-resource.yaml +@@ -3307,9 +3307,6 @@ spec: + - jsonPath: .status.phase + name: Phase + type: string +- deprecated: true +- deprecationWarning: kubevirt.io/v1alpha3 is now deprecated and will be removed +- in a future release. + name: v1alpha3 + schema: + openAPIV3Schema: +diff --git a/manifests/generated/operator-csv.yaml.in b/manifests/generated/operator-csv.yaml.in +index 400d118024..05ee099c67 100644 +--- a/manifests/generated/operator-csv.yaml.in ++++ b/manifests/generated/operator-csv.yaml.in +@@ -605,6 +605,13 @@ spec: + - '*' + verbs: + - '*' ++ - apiGroups: ++ - subresources.virtualization.deckhouse.io ++ resources: ++ - virtualmachines/addvolume ++ - virtualmachines/removevolume ++ verbs: ++ - update + - apiGroups: + - subresources.kubevirt.io + resources: +diff --git a/manifests/generated/rbac-operator.authorization.k8s.yaml.in b/manifests/generated/rbac-operator.authorization.k8s.yaml.in +index 10dbb92269..1ccc9e9fa7 100644 +--- a/manifests/generated/rbac-operator.authorization.k8s.yaml.in ++++ b/manifests/generated/rbac-operator.authorization.k8s.yaml.in +@@ -143,7 +143,7 @@ kind: RoleBinding + metadata: + labels: + kubevirt.io: "" +- name: kubevirt-operator-rolebinding ++ name: kubevirt-operator + namespace: {{.Namespace}} + roleRef: + apiGroup: rbac.authorization.k8s.io +@@ -607,6 +607,13 @@ rules: + - '*' + verbs: + - '*' ++- apiGroups: ++ - subresources.virtualization.deckhouse.io ++ resources: ++ - virtualmachines/addvolume ++ - virtualmachines/removevolume ++ verbs: ++ - update + - apiGroups: + - subresources.kubevirt.io + resources: +diff --git a/pkg/container-disk/container-disk.go b/pkg/container-disk/container-disk.go +index 3251d04787..83ecab813d 100644 +--- a/pkg/container-disk/container-disk.go ++++ b/pkg/container-disk/container-disk.go +@@ -30,6 +30,7 @@ import ( + + kubev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" ++ "k8s.io/apimachinery/pkg/types" + + "kubevirt.io/kubevirt/pkg/safepath" + +@@ -47,8 +48,10 @@ var containerDiskOwner = "qemu" + var podsBaseDir = util.KubeletPodsDir + + var mountBaseDir = filepath.Join(util.VirtShareDir, "/container-disks") ++var hotplugBaseDir = filepath.Join(util.VirtShareDir, "/hotplug-disks") + + type SocketPathGetter func(vmi *v1.VirtualMachineInstance, volumeIndex int) (string, error) ++type HotplugSocketPathGetter func(vmi *v1.VirtualMachineInstance, volumeName string, sourceUID types.UID) (string, error) + type KernelBootSocketPathGetter func(vmi *v1.VirtualMachineInstance) (string, error) + + const KernelBootName = "kernel-boot" +@@ -107,6 +110,10 @@ func GetDiskTargetPathFromLauncherView(volumeIndex int) string { + return filepath.Join(mountBaseDir, GetDiskTargetName(volumeIndex)) + } + ++func GetHotplugContainerDiskTargetPathFromLauncherView(volumeName string) string { ++ return filepath.Join(hotplugBaseDir, fmt.Sprintf("%s.img", volumeName)) ++} ++ + func GetKernelBootArtifactPathFromLauncherView(artifact string) string { + artifactBase := filepath.Base(artifact) + return filepath.Join(mountBaseDir, KernelBootName, artifactBase) +@@ -170,6 +177,27 @@ func NewSocketPathGetter(baseDir string) SocketPathGetter { + } + } + ++func NewHotplugSocketPathGetter(baseDir string) HotplugSocketPathGetter { ++ return func(vmi *v1.VirtualMachineInstance, volumeName string, sourceUID types.UID) (string, error) { ++ for _, v := range vmi.Status.VolumeStatus { ++ if v.Name == volumeName && v.HotplugVolume != nil && v.ContainerDiskVolume != nil { ++ uid := string(sourceUID) ++ if uid == "" { ++ uid = string(v.HotplugVolume.AttachPodUID) ++ } ++ basePath := getHotplugContainerDiskSocketBasePath(baseDir, uid) ++ socketPath := filepath.Join(basePath, fmt.Sprintf("hotplug-container-disk-%s.sock", volumeName)) ++ exists, _ := diskutils.FileExists(socketPath) ++ if exists { ++ return socketPath, nil ++ } ++ } ++ } ++ ++ return "", fmt.Errorf("container disk socket path not found for vmi \"%s\"", vmi.Name) ++ } ++} ++ + // NewKernelBootSocketPathGetter get the socket pat of the kernel-boot containerDisk. For testing a baseDir + // can be provided which can for instance point to /tmp. + func NewKernelBootSocketPathGetter(baseDir string) KernelBootSocketPathGetter { +@@ -278,7 +306,7 @@ func generateContainersHelper(vmi *v1.VirtualMachineInstance, config *virtconfig + } + + func generateContainerFromVolume(vmi *v1.VirtualMachineInstance, config *virtconfig.ClusterConfig, imageIDs map[string]string, podVolumeName, binVolumeName string, isInit, isKernelBoot bool, volume *v1.Volume, volumeIdx int) *kubev1.Container { +- if volume.ContainerDisk == nil { ++ if volume.ContainerDisk == nil || volume.ContainerDisk.Hotpluggable { + return nil + } + +@@ -378,11 +406,34 @@ func CreateEphemeralImages( + // for each disk that requires it. + + for i, volume := range vmi.Spec.Volumes { +- if volume.VolumeSource.ContainerDisk != nil { ++ if volume.VolumeSource.ContainerDisk != nil && !volume.VolumeSource.ContainerDisk.Hotpluggable { ++ info, _ := disksInfo[volume.Name] ++ if info == nil { ++ return fmt.Errorf("no disk info provided for volume %s", volume.Name) ++ } ++ if backingFile, err := GetDiskTargetPartFromLauncherView(i); err != nil { ++ return err ++ } else if err := diskCreator.CreateBackedImageForVolume(volume, backingFile, info.Format); err != nil { ++ return err ++ } ++ } ++ } ++ ++ return nil ++} ++ ++func CreateEphemeralImagesForHotplug( ++ vmi *v1.VirtualMachineInstance, ++ diskCreator ephemeraldisk.EphemeralDiskCreatorInterface, ++ disksInfo map[string]*DiskInfo, ++) error { ++ for i, volume := range vmi.Spec.Volumes { ++ if volume.VolumeSource.ContainerDisk != nil && volume.VolumeSource.ContainerDisk.Hotpluggable { + info, _ := disksInfo[volume.Name] + if info == nil { + return fmt.Errorf("no disk info provided for volume %s", volume.Name) + } ++ + if backingFile, err := GetDiskTargetPartFromLauncherView(i); err != nil { + return err + } else if err := diskCreator.CreateBackedImageForVolume(volume, backingFile, info.Format); err != nil { +@@ -398,6 +449,10 @@ func getContainerDiskSocketBasePath(baseDir, podUID string) string { + return fmt.Sprintf("%s/pods/%s/volumes/kubernetes.io~empty-dir/container-disks", baseDir, podUID) + } + ++func getHotplugContainerDiskSocketBasePath(baseDir, podUID string) string { ++ return fmt.Sprintf("%s/pods/%s/volumes/kubernetes.io~empty-dir/hotplug-container-disks", baseDir, podUID) ++} ++ + // ExtractImageIDsFromSourcePod takes the VMI and its source pod to determine the exact image used by containerdisks and boot container images, + // which is recorded in the status section of a started pod; if the status section does not contain this info the tag is used. + // It returns a map where the key is the vlume name and the value is the imageID +diff --git a/pkg/container-disk/util.go b/pkg/container-disk/util.go +new file mode 100644 +index 0000000000..293ca1d9b4 +--- /dev/null ++++ b/pkg/container-disk/util.go +@@ -0,0 +1,10 @@ ++package containerdisk ++ ++import virtv1 "kubevirt.io/api/core/v1" ++ ++func IsHotplugContainerDisk(v *virtv1.Volume) bool { ++ return v != nil && IsHotplugContainerDiskSource(v.VolumeSource) ++} ++func IsHotplugContainerDiskSource(vs virtv1.VolumeSource) bool { ++ return vs.ContainerDisk != nil && vs.ContainerDisk.Hotpluggable ++} +diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go +index 490cc445ef..96c48937e7 100644 +--- a/pkg/controller/controller.go ++++ b/pkg/controller/controller.go +@@ -38,6 +38,8 @@ import ( + v1 "kubevirt.io/api/core/v1" + "kubevirt.io/client-go/log" + cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1" ++ ++ container_disk "kubevirt.io/kubevirt/pkg/container-disk" + ) + + const ( +@@ -278,6 +280,10 @@ func ApplyVolumeRequestOnVMISpec(vmiSpec *v1.VirtualMachineInstanceSpec, request + dvSource := request.AddVolumeOptions.VolumeSource.DataVolume.DeepCopy() + dvSource.Hotpluggable = true + newVolume.VolumeSource.DataVolume = dvSource ++ } else if request.AddVolumeOptions.VolumeSource.ContainerDisk != nil { ++ containerDiskSource := request.AddVolumeOptions.VolumeSource.ContainerDisk.DeepCopy() ++ containerDiskSource.Hotpluggable = true ++ newVolume.VolumeSource.ContainerDisk = containerDiskSource + } + + vmiSpec.Volumes = append(vmiSpec.Volumes, newVolume) +@@ -444,6 +450,9 @@ func VMIHasHotplugVolumes(vmi *v1.VirtualMachineInstance) bool { + if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.Hotpluggable { + return true + } ++ if volume.ContainerDisk != nil && volume.ContainerDisk.Hotpluggable { ++ return true ++ } + } + return false + } +@@ -557,7 +566,7 @@ func GetHotplugVolumes(vmi *v1.VirtualMachineInstance, virtlauncherPod *k8sv1.Po + podVolumeMap[podVolume.Name] = podVolume + } + for _, vmiVolume := range vmiVolumes { +- if _, ok := podVolumeMap[vmiVolume.Name]; !ok && (vmiVolume.DataVolume != nil || vmiVolume.PersistentVolumeClaim != nil || vmiVolume.MemoryDump != nil) { ++ if _, ok := podVolumeMap[vmiVolume.Name]; !ok && (vmiVolume.DataVolume != nil || vmiVolume.PersistentVolumeClaim != nil || vmiVolume.MemoryDump != nil || container_disk.IsHotplugContainerDisk(&vmiVolume)) { + hotplugVolumes = append(hotplugVolumes, vmiVolume.DeepCopy()) + } + } +diff --git a/pkg/storage/types/pvc.go b/pkg/storage/types/pvc.go +index c8f387db16..81c80c260f 100644 +--- a/pkg/storage/types/pvc.go ++++ b/pkg/storage/types/pvc.go +@@ -144,6 +144,9 @@ func GetPVCsFromVolumes(volumes []virtv1.Volume) map[string]string { + func VirtVolumesToPVCMap(volumes []*virtv1.Volume, pvcStore cache.Store, namespace string) (map[string]*k8sv1.PersistentVolumeClaim, error) { + volumeNamesPVCMap := make(map[string]*k8sv1.PersistentVolumeClaim) + for _, volume := range volumes { ++ if volume.ContainerDisk != nil { ++ continue ++ } + claimName := PVCNameFromVirtVolume(volume) + if claimName == "" { + return nil, fmt.Errorf("volume %s is not a PVC or Datavolume", volume.Name) +@@ -280,6 +283,9 @@ func IsHotplugVolume(vol *virtv1.Volume) bool { + if volSrc.MemoryDump != nil && volSrc.MemoryDump.PersistentVolumeClaimVolumeSource.Hotpluggable { + return true + } ++ if volSrc.ContainerDisk != nil && volSrc.ContainerDisk.Hotpluggable { ++ return true ++ } + + return false + } +diff --git a/pkg/virt-api/rest/subresource.go b/pkg/virt-api/rest/subresource.go +index b5d62f5af5..6a3e2b4143 100644 +--- a/pkg/virt-api/rest/subresource.go ++++ b/pkg/virt-api/rest/subresource.go +@@ -1004,7 +1004,9 @@ func addVolumeRequestExists(request v1.VirtualMachineVolumeRequest, name string) + } + + func volumeHotpluggable(volume v1.Volume) bool { +- return (volume.DataVolume != nil && volume.DataVolume.Hotpluggable) || (volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.Hotpluggable) ++ return (volume.DataVolume != nil && volume.DataVolume.Hotpluggable) || ++ (volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.Hotpluggable) || ++ (volume.ContainerDisk != nil && volume.ContainerDisk.Hotpluggable) + } + + func volumeNameExists(volume v1.Volume, volumeName string) bool { +@@ -1018,12 +1020,16 @@ func volumeSourceName(volumeSource *v1.HotplugVolumeSource) string { + if volumeSource.PersistentVolumeClaim != nil { + return volumeSource.PersistentVolumeClaim.ClaimName + } ++ if volumeSource.ContainerDisk != nil { ++ return volumeSource.ContainerDisk.Image ++ } + return "" + } + + func volumeSourceExists(volume v1.Volume, volumeName string) bool { + return (volume.DataVolume != nil && volume.DataVolume.Name == volumeName) || +- (volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName == volumeName) ++ (volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName == volumeName) || ++ (volume.ContainerDisk != nil && volume.ContainerDisk.Image == volumeName) + } + + func volumeExists(volume v1.Volume, volumeName string) bool { +@@ -1125,6 +1131,8 @@ func (app *SubresourceAPIApp) addVolumeRequestHandler(request *restful.Request, + opts.VolumeSource.DataVolume.Hotpluggable = true + } else if opts.VolumeSource.PersistentVolumeClaim != nil { + opts.VolumeSource.PersistentVolumeClaim.Hotpluggable = true ++ } else if opts.VolumeSource.ContainerDisk != nil { ++ opts.VolumeSource.ContainerDisk.Hotpluggable = true + } + + // inject into VMI if ephemeral, else set as a request on the VM to both make permanent and hotplug. +diff --git a/pkg/virt-api/webhooks/validating-webhook/admitters/vmi-update-admitter.go b/pkg/virt-api/webhooks/validating-webhook/admitters/vmi-update-admitter.go +index 0af25f8074..b984ff4262 100644 +--- a/pkg/virt-api/webhooks/validating-webhook/admitters/vmi-update-admitter.go ++++ b/pkg/virt-api/webhooks/validating-webhook/admitters/vmi-update-admitter.go +@@ -200,11 +200,11 @@ func verifyHotplugVolumes(newHotplugVolumeMap, oldHotplugVolumeMap map[string]v1 + } + } else { + // This is a new volume, ensure that the volume is either DV, PVC or memoryDumpVolume +- if v.DataVolume == nil && v.PersistentVolumeClaim == nil && v.MemoryDump == nil { ++ if v.DataVolume == nil && v.PersistentVolumeClaim == nil && v.MemoryDump == nil && v.ContainerDisk == nil { + return webhookutils.ToAdmissionResponse([]metav1.StatusCause{ + { + Type: metav1.CauseTypeFieldValueInvalid, +- Message: fmt.Sprintf("volume %s is not a PVC or DataVolume", k), ++ Message: fmt.Sprintf("volume %s is not a PVC,DataVolume,MemoryDumpVolume or ContainerDisk", k), + }, + }) + } +@@ -219,19 +219,19 @@ func verifyHotplugVolumes(newHotplugVolumeMap, oldHotplugVolumeMap map[string]v1 + }) + } + disk := newDisks[k] +- if disk.Disk == nil && disk.LUN == nil { ++ if disk.Disk == nil && disk.LUN == nil && disk.CDRom == nil { + return webhookutils.ToAdmissionResponse([]metav1.StatusCause{ + { + Type: metav1.CauseTypeFieldValueInvalid, +- Message: fmt.Sprintf("Disk %s requires diskDevice of type 'disk' or 'lun' to be hotplugged.", k), ++ Message: fmt.Sprintf("Disk %s requires diskDevice of type 'disk','lun' or cdrom to be hotplugged.", k), + }, + }) + } +- if (disk.Disk == nil || disk.Disk.Bus != "scsi") && (disk.LUN == nil || disk.LUN.Bus != "scsi") { ++ if (disk.Disk != nil && disk.Disk.Bus != v1.DiskBusSCSI) || (disk.LUN != nil && disk.LUN.Bus != v1.DiskBusSCSI) || (disk.CDRom != nil && disk.CDRom.Bus != v1.DiskBusSCSI) { + return webhookutils.ToAdmissionResponse([]metav1.StatusCause{ + { + Type: metav1.CauseTypeFieldValueInvalid, +- Message: fmt.Sprintf("hotplugged Disk %s does not use a scsi bus", k), ++ Message: fmt.Sprintf("hotplugged Disk %s does not use a %q bus", k, v1.DiskBusSCSI), + }, + }) + +diff --git a/pkg/virt-api/webhooks/validating-webhook/admitters/vms-admitter.go b/pkg/virt-api/webhooks/validating-webhook/admitters/vms-admitter.go +index f7e4f92727..edaf475e05 100644 +--- a/pkg/virt-api/webhooks/validating-webhook/admitters/vms-admitter.go ++++ b/pkg/virt-api/webhooks/validating-webhook/admitters/vms-admitter.go +@@ -591,6 +591,8 @@ func (admitter *VMsAdmitter) validateVolumeRequests(vm *v1.VirtualMachine) ([]me + newVolume.VolumeSource.PersistentVolumeClaim = volumeRequest.AddVolumeOptions.VolumeSource.PersistentVolumeClaim + } else if volumeRequest.AddVolumeOptions.VolumeSource.DataVolume != nil { + newVolume.VolumeSource.DataVolume = volumeRequest.AddVolumeOptions.VolumeSource.DataVolume ++ } else if volumeRequest.AddVolumeOptions.VolumeSource.ContainerDisk != nil { ++ newVolume.VolumeSource.ContainerDisk = volumeRequest.AddVolumeOptions.VolumeSource.ContainerDisk + } + + vmVolume, ok := vmVolumeMap[name] +@@ -666,7 +668,6 @@ func (admitter *VMsAdmitter) validateVolumeRequests(vm *v1.VirtualMachine) ([]me + } + + func validateDiskConfiguration(disk *v1.Disk, name string) []metav1.StatusCause { +- var bus v1.DiskBus + // Validate the disk is configured properly + if disk == nil { + return []metav1.StatusCause{{ +@@ -675,19 +676,23 @@ func validateDiskConfiguration(disk *v1.Disk, name string) []metav1.StatusCause + Field: k8sfield.NewPath("Status", "volumeRequests").String(), + }} + } +- if disk.DiskDevice.Disk == nil && disk.DiskDevice.LUN == nil { ++ var bus v1.DiskBus ++ switch { ++ case disk.DiskDevice.Disk != nil: ++ bus = disk.DiskDevice.Disk.Bus ++ case disk.DiskDevice.LUN != nil: ++ bus = disk.DiskDevice.LUN.Bus ++ case disk.DiskDevice.CDRom != nil: ++ bus = disk.DiskDevice.CDRom.Bus ++ default: + return []metav1.StatusCause{{ + Type: metav1.CauseTypeFieldValueInvalid, +- Message: fmt.Sprintf("AddVolume request for [%s] requires diskDevice of type 'disk' or 'lun' to be used.", name), ++ Message: fmt.Sprintf("AddVolume request for [%s] requires diskDevice of type 'disk',lun' or 'cdrom' to be used.", name), + Field: k8sfield.NewPath("Status", "volumeRequests").String(), + }} + } +- if disk.DiskDevice.Disk != nil { +- bus = disk.DiskDevice.Disk.Bus +- } else { +- bus = disk.DiskDevice.LUN.Bus +- } +- if bus != "scsi" { ++ ++ if bus != v1.DiskBusSCSI { + return []metav1.StatusCause{{ + Type: metav1.CauseTypeFieldValueInvalid, + Message: fmt.Sprintf("AddVolume request for [%s] requires disk bus to be 'scsi'. [%s] is not permitted", name, bus), +diff --git a/pkg/virt-controller/services/rendervolumes.go b/pkg/virt-controller/services/rendervolumes.go +index 0181fc05e3..de90ed3cbc 100644 +--- a/pkg/virt-controller/services/rendervolumes.go ++++ b/pkg/virt-controller/services/rendervolumes.go +@@ -296,7 +296,9 @@ func hotplugVolumes(vmiVolumeStatus []v1.VolumeStatus, vmiSpecVolumes []v1.Volum + } + // This detects hotplug volumes for a started but not ready VMI + for _, volume := range vmiSpecVolumes { +- if (volume.DataVolume != nil && volume.DataVolume.Hotpluggable) || (volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.Hotpluggable) { ++ if (volume.DataVolume != nil && volume.DataVolume.Hotpluggable) || ++ (volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.Hotpluggable) || ++ (volume.ContainerDisk != nil && volume.ContainerDisk.Hotpluggable) { + hotplugVolumeSet[volume.Name] = struct{}{} + } + } +diff --git a/pkg/virt-controller/services/template.go b/pkg/virt-controller/services/template.go +index 76ed7307ec..f607c24786 100644 +--- a/pkg/virt-controller/services/template.go ++++ b/pkg/virt-controller/services/template.go +@@ -64,13 +64,15 @@ import ( + ) + + const ( +- containerDisks = "container-disks" +- hotplugDisks = "hotplug-disks" +- hookSidecarSocks = "hook-sidecar-sockets" +- varRun = "/var/run" +- virtBinDir = "virt-bin-share-dir" +- hotplugDisk = "hotplug-disk" +- virtExporter = "virt-exporter" ++ containerDisks = "container-disks" ++ hotplugDisks = "hotplug-disks" ++ hookSidecarSocks = "hook-sidecar-sockets" ++ varRun = "/var/run" ++ virtBinDir = "virt-bin-share-dir" ++ hotplugDisk = "hotplug-disk" ++ virtExporter = "virt-exporter" ++ hotplugContainerDisks = "hotplug-container-disks" ++ HotplugContainerDisk = "hotplug-container-disk-" + ) + + const KvmDevice = "devices.virtualization.deckhouse.io/kvm" +@@ -846,6 +848,49 @@ func sidecarContainerName(i int) string { + return fmt.Sprintf("hook-sidecar-%d", i) + } + ++func sidecarContainerHotplugContainerdDiskName(name string) string { ++ return fmt.Sprintf("%s%s", HotplugContainerDisk, name) ++} ++ ++func (t *templateService) containerForHotplugContainerDisk(name string, cd *v1.ContainerDiskSource, vmi *v1.VirtualMachineInstance) k8sv1.Container { ++ runUser := int64(util.NonRootUID) ++ sharedMount := k8sv1.MountPropagationHostToContainer ++ path := fmt.Sprintf("/path/%s", name) ++ command := []string{"/init/usr/bin/container-disk"} ++ args := []string{"--copy-path", path} ++ ++ return k8sv1.Container{ ++ Name: name, ++ Image: cd.Image, ++ Command: command, ++ Args: args, ++ Resources: hotplugContainerResourceRequirementsForVMI(vmi, t.clusterConfig), ++ SecurityContext: &k8sv1.SecurityContext{ ++ AllowPrivilegeEscalation: pointer.Bool(false), ++ RunAsNonRoot: pointer.Bool(true), ++ RunAsUser: &runUser, ++ SeccompProfile: &k8sv1.SeccompProfile{ ++ Type: k8sv1.SeccompProfileTypeRuntimeDefault, ++ }, ++ Capabilities: &k8sv1.Capabilities{ ++ Drop: []k8sv1.Capability{"ALL"}, ++ }, ++ SELinuxOptions: &k8sv1.SELinuxOptions{ ++ Type: t.clusterConfig.GetSELinuxLauncherType(), ++ Level: "s0", ++ }, ++ }, ++ VolumeMounts: []k8sv1.VolumeMount{ ++ initContainerVolumeMount(), ++ { ++ Name: hotplugContainerDisks, ++ MountPath: "/path", ++ MountPropagation: &sharedMount, ++ }, ++ }, ++ } ++} ++ + func (t *templateService) RenderHotplugAttachmentPodTemplate(volumes []*v1.Volume, ownerPod *k8sv1.Pod, vmi *v1.VirtualMachineInstance, claimMap map[string]*k8sv1.PersistentVolumeClaim) (*k8sv1.Pod, error) { + zero := int64(0) + runUser := int64(util.NonRootUID) +@@ -924,6 +969,30 @@ func (t *templateService) RenderHotplugAttachmentPodTemplate(volumes []*v1.Volum + TerminationGracePeriodSeconds: &zero, + }, + } ++ first := true ++ for _, vol := range vmi.Spec.Volumes { ++ if vol.ContainerDisk == nil || !vol.ContainerDisk.Hotpluggable { ++ continue ++ } ++ name := sidecarContainerHotplugContainerdDiskName(vol.Name) ++ pod.Spec.Containers = append(pod.Spec.Containers, t.containerForHotplugContainerDisk(name, vol.ContainerDisk, vmi)) ++ if first { ++ first = false ++ userId := int64(util.NonRootUID) ++ initContainerCommand := []string{"/usr/bin/cp", ++ "/usr/bin/container-disk", ++ "/init/usr/bin/container-disk", ++ } ++ pod.Spec.InitContainers = append( ++ pod.Spec.InitContainers, ++ t.newInitContainerRenderer(vmi, ++ initContainerVolumeMount(), ++ initContainerResourceRequirementsForVMI(vmi, v1.ContainerDisk, t.clusterConfig), ++ userId).Render(initContainerCommand)) ++ pod.Spec.Volumes = append(pod.Spec.Volumes, emptyDirVolume(hotplugContainerDisks)) ++ pod.Spec.Volumes = append(pod.Spec.Volumes, emptyDirVolume(virtBinDir)) ++ } ++ } + + err := matchSELinuxLevelOfVMI(pod, vmi) + if err != nil { +diff --git a/pkg/virt-controller/watch/BUILD.bazel b/pkg/virt-controller/watch/BUILD.bazel +index 4fd325ba86..82fcaee0a3 100644 +--- a/pkg/virt-controller/watch/BUILD.bazel ++++ b/pkg/virt-controller/watch/BUILD.bazel +@@ -101,6 +101,7 @@ go_library( + "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", + "//vendor/k8s.io/client-go/util/workqueue:go_default_library", + "//vendor/k8s.io/utils/pointer:go_default_library", ++ "//vendor/k8s.io/utils/ptr:go_default_library", + "//vendor/k8s.io/utils/trace:go_default_library", + "//vendor/kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1:go_default_library", + ], +diff --git a/pkg/virt-controller/watch/vmi.go b/pkg/virt-controller/watch/vmi.go +index fa4e86ee17..c40f1fad89 100644 +--- a/pkg/virt-controller/watch/vmi.go ++++ b/pkg/virt-controller/watch/vmi.go +@@ -32,6 +32,7 @@ import ( + + "k8s.io/utils/ptr" + ++ container_disk "kubevirt.io/kubevirt/pkg/container-disk" + "kubevirt.io/kubevirt/pkg/virt-controller/network" + + "kubevirt.io/kubevirt/pkg/virt-controller/watch/topology" +@@ -1774,12 +1775,18 @@ func (c *VMIController) needsHandleHotplug(hotplugVolumes []*virtv1.Volume, hotp + } + + func (c *VMIController) getActiveAndOldAttachmentPods(readyHotplugVolumes []*virtv1.Volume, hotplugAttachmentPods []*k8sv1.Pod) (*k8sv1.Pod, []*k8sv1.Pod) { ++ sort.Slice(hotplugAttachmentPods, func(i, j int) bool { ++ return hotplugAttachmentPods[i].CreationTimestamp.Time.Before(hotplugAttachmentPods[j].CreationTimestamp.Time) ++ }) + var currentPod *k8sv1.Pod + oldPods := make([]*k8sv1.Pod, 0) + for _, attachmentPod := range hotplugAttachmentPods { + if !c.podVolumesMatchesReadyVolumes(attachmentPod, readyHotplugVolumes) { + oldPods = append(oldPods, attachmentPod) + } else { ++ if currentPod != nil { ++ oldPods = append(oldPods, currentPod) ++ } + currentPod = attachmentPod + } + } +@@ -1836,6 +1843,10 @@ func (c *VMIController) handleHotplugVolumes(hotplugVolumes []*virtv1.Volume, ho + readyHotplugVolumes := make([]*virtv1.Volume, 0) + // Find all ready volumes + for _, volume := range hotplugVolumes { ++ if container_disk.IsHotplugContainerDisk(volume) { ++ readyHotplugVolumes = append(readyHotplugVolumes, volume) ++ continue ++ } + var err error + ready, wffc, err := storagetypes.VolumeReadyToAttachToNode(vmi.Namespace, *volume, dataVolumes, c.dataVolumeIndexer, c.pvcIndexer) + if err != nil { +@@ -1883,20 +1894,45 @@ func (c *VMIController) handleHotplugVolumes(hotplugVolumes []*virtv1.Volume, ho + } + + func (c *VMIController) podVolumesMatchesReadyVolumes(attachmentPod *k8sv1.Pod, volumes []*virtv1.Volume) bool { +- // -2 for empty dir and token +- if len(attachmentPod.Spec.Volumes)-2 != len(volumes) { ++ const ( ++ // -2 for empty dir and token ++ subVols = 2 ++ // -4 for hotplug with ContainerDisk. 3 empty dir + token ++ subVolsWithContainerDisk = 4 ++ ) ++ containerDisksNames := make(map[string]struct{}) ++ for _, ctr := range attachmentPod.Spec.Containers { ++ if strings.HasPrefix(ctr.Name, services.HotplugContainerDisk) { ++ containerDisksNames[strings.TrimPrefix(ctr.Name, services.HotplugContainerDisk)] = struct{}{} ++ } ++ } ++ ++ var sub = subVols ++ if len(containerDisksNames) > 0 { ++ sub = subVolsWithContainerDisk ++ } ++ ++ countAttachmentVolumes := len(attachmentPod.Spec.Volumes) - sub + len(containerDisksNames) ++ ++ if countAttachmentVolumes != len(volumes) { + return false + } +- podVolumeMap := make(map[string]k8sv1.Volume) ++ ++ podVolumeMap := make(map[string]struct{}) + for _, volume := range attachmentPod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { +- podVolumeMap[volume.Name] = volume ++ podVolumeMap[volume.Name] = struct{}{} + } + } ++ + for _, volume := range volumes { ++ if container_disk.IsHotplugContainerDisk(volume) { ++ delete(containerDisksNames, volume.Name) ++ continue ++ } + delete(podVolumeMap, volume.Name) + } +- return len(podVolumeMap) == 0 ++ return len(podVolumeMap) == 0 && len(containerDisksNames) == 0 + } + + func (c *VMIController) createAttachmentPod(vmi *virtv1.VirtualMachineInstance, virtLauncherPod *k8sv1.Pod, volumes []*virtv1.Volume) (*k8sv1.Pod, syncError) { +@@ -2007,7 +2043,17 @@ func (c *VMIController) createAttachmentPodTemplate(vmi *virtv1.VirtualMachineIn + var pod *k8sv1.Pod + var err error + +- volumeNamesPVCMap, err := storagetypes.VirtVolumesToPVCMap(volumes, c.pvcIndexer, virtlauncherPod.Namespace) ++ var hasContainerDisk bool ++ var newVolumes []*virtv1.Volume ++ for _, volume := range volumes { ++ if volume.VolumeSource.ContainerDisk != nil { ++ hasContainerDisk = true ++ continue ++ } ++ newVolumes = append(newVolumes, volume) ++ } ++ ++ volumeNamesPVCMap, err := storagetypes.VirtVolumesToPVCMap(newVolumes, c.pvcIndexer, virtlauncherPod.Namespace) + if err != nil { + return nil, fmt.Errorf("failed to get PVC map: %v", err) + } +@@ -2029,7 +2075,7 @@ func (c *VMIController) createAttachmentPodTemplate(vmi *virtv1.VirtualMachineIn + } + } + +- if len(volumeNamesPVCMap) > 0 { ++ if len(volumeNamesPVCMap) > 0 || hasContainerDisk { + pod, err = c.templateService.RenderHotplugAttachmentPodTemplate(volumes, virtlauncherPod, vmi, volumeNamesPVCMap) + } + return pod, err +@@ -2151,23 +2197,68 @@ func (c *VMIController) updateVolumeStatus(vmi *virtv1.VirtualMachineInstance, v + ClaimName: volume.Name, + } + } ++ if volume.ContainerDisk != nil && status.ContainerDiskVolume == nil { ++ status.ContainerDiskVolume = &virtv1.ContainerDiskInfo{} ++ } + if attachmentPod == nil { +- if !c.volumeReady(status.Phase) { ++ if volume.ContainerDisk != nil { + status.HotplugVolume.AttachPodUID = "" +- // Volume is not hotplugged in VM and Pod is gone, or hasn't been created yet, check for the PVC associated with the volume to set phase and message +- phase, reason, message := c.getVolumePhaseMessageReason(&vmi.Spec.Volumes[i], vmi.Namespace) +- status.Phase = phase +- status.Message = message +- status.Reason = reason ++ status.Phase = virtv1.VolumePending ++ status.Message = "Attachment pod not found" ++ status.Reason = "AttachmentPodNotFound" ++ } else { ++ if !c.volumeReady(status.Phase) { ++ status.HotplugVolume.AttachPodUID = "" ++ // Volume is not hotplugged in VM and Pod is gone, or hasn't been created yet, check for the PVC associated with the volume to set phase and message ++ phase, reason, message := c.getVolumePhaseMessageReason(&vmi.Spec.Volumes[i], vmi.Namespace) ++ status.Phase = phase ++ status.Message = message ++ status.Reason = reason ++ } + } + } else { + status.HotplugVolume.AttachPodName = attachmentPod.Name +- if len(attachmentPod.Status.ContainerStatuses) == 1 && attachmentPod.Status.ContainerStatuses[0].Ready { ++ if volume.ContainerDisk != nil { ++ var ( ++ uid types.UID ++ isReady bool ++ ) ++ for _, cs := range attachmentPod.Status.ContainerStatuses { ++ name := strings.TrimPrefix(cs.Name, "hotplug-container-disk-") ++ if volume.Name == name { ++ uid = attachmentPod.UID ++ isReady = cs.Ready ++ break ++ } ++ } ++ if isReady { ++ status.HotplugVolume.AttachPodUID = uid ++ } else { ++ status.HotplugVolume.AttachPodUID = "" ++ ++ } ++ } else if len(attachmentPod.Status.ContainerStatuses) == 1 && attachmentPod.Status.ContainerStatuses[0].Ready { + status.HotplugVolume.AttachPodUID = attachmentPod.UID ++ } else if volume.PersistentVolumeClaim != nil { ++ var isReady bool ++ for _, cs := range attachmentPod.Status.ContainerStatuses { ++ if cs.Name == "hotplug-disk" { ++ isReady = cs.Ready ++ break ++ } ++ } ++ ++ if isReady { ++ status.HotplugVolume.AttachPodUID = attachmentPod.UID ++ } else { ++ // Remove UID of old pod if a new one is available, but not yet ready ++ status.HotplugVolume.AttachPodUID = "" ++ } + } else { + // Remove UID of old pod if a new one is available, but not yet ready + status.HotplugVolume.AttachPodUID = "" + } ++ + if c.canMoveToAttachedPhase(status.Phase) { + status.Phase = virtv1.HotplugVolumeAttachedToNode + status.Message = fmt.Sprintf("Created hotplug attachment pod %s, for volume %s", attachmentPod.Name, volume.Name) +@@ -2176,7 +2267,6 @@ func (c *VMIController) updateVolumeStatus(vmi *virtv1.VirtualMachineInstance, v + } + } + } +- + if volume.VolumeSource.PersistentVolumeClaim != nil || volume.VolumeSource.DataVolume != nil || volume.VolumeSource.MemoryDump != nil { + + pvcName := storagetypes.PVCNameFromVirtVolume(&volume) +diff --git a/pkg/virt-handler/container-disk/hotplug.go b/pkg/virt-handler/container-disk/hotplug.go +new file mode 100644 +index 0000000000..553f76cb0a +--- /dev/null ++++ b/pkg/virt-handler/container-disk/hotplug.go +@@ -0,0 +1,538 @@ ++package container_disk ++ ++import ( ++ "encoding/json" ++ "errors" ++ "fmt" ++ "os" ++ "path/filepath" ++ "strings" ++ "sync" ++ "time" ++ ++ hotplugdisk "kubevirt.io/kubevirt/pkg/hotplug-disk" ++ "kubevirt.io/kubevirt/pkg/unsafepath" ++ ++ "kubevirt.io/kubevirt/pkg/safepath" ++ virtconfig "kubevirt.io/kubevirt/pkg/virt-config" ++ virt_chroot "kubevirt.io/kubevirt/pkg/virt-handler/virt-chroot" ++ ++ "kubevirt.io/client-go/log" ++ ++ containerdisk "kubevirt.io/kubevirt/pkg/container-disk" ++ diskutils "kubevirt.io/kubevirt/pkg/ephemeral-disk-utils" ++ "kubevirt.io/kubevirt/pkg/virt-handler/isolation" ++ ++ "k8s.io/apimachinery/pkg/api/equality" ++ "k8s.io/apimachinery/pkg/types" ++ ++ v1 "kubevirt.io/api/core/v1" ++) ++ ++type HotplugMounter interface { ++ ContainerDisksReady(vmi *v1.VirtualMachineInstance, notInitializedSince time.Time, sourceUID types.UID) (bool, error) ++ MountAndVerify(vmi *v1.VirtualMachineInstance) (map[string]*containerdisk.DiskInfo, error) ++ MoundAndVerifyFromPod(vmi *v1.VirtualMachineInstance, sourceUID types.UID) (map[string]*containerdisk.DiskInfo, error) ++ IsMounted(vmi *v1.VirtualMachineInstance, volumeName string) (bool, error) ++ Umount(vmi *v1.VirtualMachineInstance) error ++ UmountAll(vmi *v1.VirtualMachineInstance) error ++ ComputeChecksums(vmi *v1.VirtualMachineInstance, sourceUID types.UID) (*DiskChecksums, error) ++} ++ ++type hotplugMounter struct { ++ podIsolationDetector isolation.PodIsolationDetector ++ mountStateDir string ++ mountRecords map[types.UID]*vmiMountTargetRecord ++ mountRecordsLock sync.Mutex ++ suppressWarningTimeout time.Duration ++ clusterConfig *virtconfig.ClusterConfig ++ nodeIsolationResult isolation.IsolationResult ++ ++ hotplugPathGetter containerdisk.HotplugSocketPathGetter ++ hotplugManager hotplugdisk.HotplugDiskManagerInterface ++} ++ ++func (m *hotplugMounter) IsMounted(vmi *v1.VirtualMachineInstance, volumeName string) (bool, error) { ++ virtLauncherUID := m.findVirtlauncherUID(vmi) ++ if virtLauncherUID == "" { ++ return false, nil ++ } ++ target, err := m.hotplugManager.GetFileSystemDiskTargetPathFromHostView(virtLauncherUID, volumeName, false) ++ if err != nil { ++ return false, err ++ } ++ return isolation.IsMounted(target) ++} ++ ++func NewHotplugMounter(isoDetector isolation.PodIsolationDetector, ++ mountStateDir string, ++ clusterConfig *virtconfig.ClusterConfig, ++ hotplugManager hotplugdisk.HotplugDiskManagerInterface, ++) HotplugMounter { ++ return &hotplugMounter{ ++ mountRecords: make(map[types.UID]*vmiMountTargetRecord), ++ podIsolationDetector: isoDetector, ++ mountStateDir: mountStateDir, ++ suppressWarningTimeout: 1 * time.Minute, ++ clusterConfig: clusterConfig, ++ nodeIsolationResult: isolation.NodeIsolationResult(), ++ ++ hotplugPathGetter: containerdisk.NewHotplugSocketPathGetter(""), ++ hotplugManager: hotplugManager, ++ } ++} ++ ++func (m *hotplugMounter) deleteMountTargetRecord(vmi *v1.VirtualMachineInstance) error { ++ if string(vmi.UID) == "" { ++ return fmt.Errorf("unable to find container disk mounted directories for vmi without uid") ++ } ++ ++ recordFile := filepath.Join(m.mountStateDir, string(vmi.UID)) ++ ++ exists, err := diskutils.FileExists(recordFile) ++ if err != nil { ++ return err ++ } ++ ++ if exists { ++ record, err := m.getMountTargetRecord(vmi) ++ if err != nil { ++ return err ++ } ++ ++ for _, target := range record.MountTargetEntries { ++ os.Remove(target.TargetFile) ++ os.Remove(target.SocketFile) ++ } ++ ++ os.Remove(recordFile) ++ } ++ ++ m.mountRecordsLock.Lock() ++ defer m.mountRecordsLock.Unlock() ++ delete(m.mountRecords, vmi.UID) ++ ++ return nil ++} ++ ++func (m *hotplugMounter) getMountTargetRecord(vmi *v1.VirtualMachineInstance) (*vmiMountTargetRecord, error) { ++ var ok bool ++ var existingRecord *vmiMountTargetRecord ++ ++ if string(vmi.UID) == "" { ++ return nil, fmt.Errorf("unable to find container disk mounted directories for vmi without uid") ++ } ++ ++ m.mountRecordsLock.Lock() ++ defer m.mountRecordsLock.Unlock() ++ existingRecord, ok = m.mountRecords[vmi.UID] ++ ++ // first check memory cache ++ if ok { ++ return existingRecord, nil ++ } ++ ++ // if not there, see if record is on disk, this can happen if virt-handler restarts ++ recordFile := filepath.Join(m.mountStateDir, filepath.Clean(string(vmi.UID))) ++ ++ exists, err := diskutils.FileExists(recordFile) ++ if err != nil { ++ return nil, err ++ } ++ ++ if exists { ++ record := vmiMountTargetRecord{} ++ // #nosec No risk for path injection. Using static base and cleaned filename ++ bytes, err := os.ReadFile(recordFile) ++ if err != nil { ++ return nil, err ++ } ++ err = json.Unmarshal(bytes, &record) ++ if err != nil { ++ return nil, err ++ } ++ ++ if !record.UsesSafePaths { ++ record.UsesSafePaths = true ++ for i, entry := range record.MountTargetEntries { ++ safePath, err := safepath.JoinAndResolveWithRelativeRoot("/", entry.TargetFile) ++ if err != nil { ++ return nil, fmt.Errorf("failed converting legacy path to safepath: %v", err) ++ } ++ record.MountTargetEntries[i].TargetFile = unsafepath.UnsafeAbsolute(safePath.Raw()) ++ } ++ } ++ ++ m.mountRecords[vmi.UID] = &record ++ return &record, nil ++ } ++ ++ // not found ++ return nil, nil ++} ++ ++func (m *hotplugMounter) addMountTargetRecord(vmi *v1.VirtualMachineInstance, record *vmiMountTargetRecord) error { ++ return m.setAddMountTargetRecordHelper(vmi, record, true) ++} ++ ++func (m *hotplugMounter) setMountTargetRecord(vmi *v1.VirtualMachineInstance, record *vmiMountTargetRecord) error { ++ return m.setAddMountTargetRecordHelper(vmi, record, false) ++} ++ ++func (m *hotplugMounter) setAddMountTargetRecordHelper(vmi *v1.VirtualMachineInstance, record *vmiMountTargetRecord, addPreviousRules bool) error { ++ if string(vmi.UID) == "" { ++ return fmt.Errorf("unable to set container disk mounted directories for vmi without uid") ++ } ++ ++ record.UsesSafePaths = true ++ ++ recordFile := filepath.Join(m.mountStateDir, string(vmi.UID)) ++ fileExists, err := diskutils.FileExists(recordFile) ++ if err != nil { ++ return err ++ } ++ ++ m.mountRecordsLock.Lock() ++ defer m.mountRecordsLock.Unlock() ++ ++ existingRecord, ok := m.mountRecords[vmi.UID] ++ if ok && fileExists && equality.Semantic.DeepEqual(existingRecord, record) { ++ // already done ++ return nil ++ } ++ ++ if addPreviousRules && existingRecord != nil && len(existingRecord.MountTargetEntries) > 0 { ++ record.MountTargetEntries = append(record.MountTargetEntries, existingRecord.MountTargetEntries...) ++ } ++ ++ bytes, err := json.Marshal(record) ++ if err != nil { ++ return err ++ } ++ ++ err = os.MkdirAll(filepath.Dir(recordFile), 0750) ++ if err != nil { ++ return err ++ } ++ ++ err = os.WriteFile(recordFile, bytes, 0600) ++ if err != nil { ++ return err ++ } ++ ++ m.mountRecords[vmi.UID] = record ++ ++ return nil ++} ++ ++func (m *hotplugMounter) MoundAndVerifyFromPod(vmi *v1.VirtualMachineInstance, sourceUID types.UID) (map[string]*containerdisk.DiskInfo, error) { ++ return m.mountAndVerify(vmi, sourceUID) ++} ++ ++func (m *hotplugMounter) MountAndVerify(vmi *v1.VirtualMachineInstance) (map[string]*containerdisk.DiskInfo, error) { ++ return m.mountAndVerify(vmi, "") ++} ++ ++func (m *hotplugMounter) mountAndVerify(vmi *v1.VirtualMachineInstance, sourceUID types.UID) (map[string]*containerdisk.DiskInfo, error) { ++ virtLauncherUID := m.findVirtlauncherUID(vmi) ++ if virtLauncherUID == "" { ++ return nil, nil ++ } ++ ++ record := vmiMountTargetRecord{} ++ disksInfo := map[string]*containerdisk.DiskInfo{} ++ ++ for _, volume := range vmi.Spec.Volumes { ++ if volume.ContainerDisk != nil && volume.ContainerDisk.Hotpluggable { ++ target, err := m.hotplugManager.GetFileSystemDiskTargetPathFromHostView(virtLauncherUID, volume.Name, true) ++ if err != nil { ++ return nil, err ++ } ++ ++ sock, err := m.hotplugPathGetter(vmi, volume.Name, sourceUID) ++ if err != nil { ++ return nil, err ++ } ++ ++ record.MountTargetEntries = append(record.MountTargetEntries, vmiMountTargetEntry{ ++ TargetFile: unsafepath.UnsafeAbsolute(target.Raw()), ++ SocketFile: sock, ++ }) ++ } ++ } ++ ++ if len(record.MountTargetEntries) > 0 { ++ err := m.setMountTargetRecord(vmi, &record) ++ if err != nil { ++ return nil, err ++ } ++ } ++ ++ vmiRes, err := m.podIsolationDetector.Detect(vmi) ++ if err != nil { ++ return nil, fmt.Errorf("failed to detect VMI pod: %v", err) ++ } ++ ++ for _, volume := range vmi.Spec.Volumes { ++ if volume.ContainerDisk != nil && volume.ContainerDisk.Hotpluggable { ++ target, err := m.hotplugManager.GetFileSystemDiskTargetPathFromHostView(virtLauncherUID, volume.Name, false) ++ ++ if isMounted, err := isolation.IsMounted(target); err != nil { ++ return nil, fmt.Errorf("failed to determine if %s is already mounted: %v", target, err) ++ } else if !isMounted { ++ ++ sourceFile, err := m.getContainerDiskPath(vmi, &volume, volume.Name, sourceUID) ++ if err != nil { ++ return nil, fmt.Errorf("failed to find a sourceFile in containerDisk %v: %v", volume.Name, err) ++ } ++ ++ log.DefaultLogger().Object(vmi).Infof("Bind mounting container disk at %s to %s", sourceFile, target) ++ opts := []string{ ++ "bind", "ro", "uid=107", "gid=107", ++ } ++ err = virt_chroot.MountChrootWithOptions(sourceFile, target, opts...) ++ if err != nil { ++ return nil, fmt.Errorf("failed to bindmount containerDisk %v. err: %w", volume.Name, err) ++ } ++ } ++ ++ imageInfo, err := isolation.GetImageInfo( ++ containerdisk.GetHotplugContainerDiskTargetPathFromLauncherView(volume.Name), ++ vmiRes, ++ m.clusterConfig.GetDiskVerification(), ++ ) ++ if err != nil { ++ return nil, fmt.Errorf("failed to get image info: %v", err) ++ } ++ if err := containerdisk.VerifyImage(imageInfo); err != nil { ++ return nil, fmt.Errorf("invalid image in containerDisk %v: %v", volume.Name, err) ++ } ++ disksInfo[volume.Name] = imageInfo ++ } ++ } ++ ++ return disksInfo, nil ++} ++ ++func (m *hotplugMounter) Umount(vmi *v1.VirtualMachineInstance) error { ++ record, err := m.getMountTargetRecord(vmi) ++ if err != nil { ++ return err ++ } else if record == nil { ++ // no entries to unmount ++ ++ log.DefaultLogger().Object(vmi).Infof("No container disk mount entries found to unmount") ++ return nil ++ } ++ ++ entriesForDelete := make(map[vmiMountTargetEntry]struct{}) ++ ++ for _, r := range record.MountTargetEntries { ++ name, err := extractNameFromSocket(r.SocketFile) ++ if err != nil { ++ return err ++ } ++ needUmount := true ++ for _, v := range vmi.Status.VolumeStatus { ++ if v.Name == name { ++ needUmount = false ++ } ++ } ++ if needUmount { ++ file, err := safepath.NewFileNoFollow(r.TargetFile) ++ if err != nil { ++ if errors.Is(err, os.ErrNotExist) { ++ entriesForDelete[r] = struct{}{} ++ continue ++ } ++ return fmt.Errorf(failedCheckMountPointFmt, r.TargetFile, err) ++ } ++ _ = file.Close() ++ mounted, err := m.IsMounted(vmi, name) ++ if err != nil { ++ return fmt.Errorf(failedCheckMountPointFmt, r.TargetFile, err) ++ } ++ if !mounted { ++ entriesForDelete[r] = struct{}{} ++ continue ++ } ++ // #nosec No risk for attacket injection. Parameters are predefined strings ++ out, err := virt_chroot.UmountChroot(file.Path()).CombinedOutput() ++ if err != nil { ++ return fmt.Errorf(failedUnmountFmt, file, string(out), err) ++ } ++ entriesForDelete[r] = struct{}{} ++ } ++ } ++ newEntries := make([]vmiMountTargetEntry, 0, len(record.MountTargetEntries)-len(entriesForDelete)) ++ for _, entry := range record.MountTargetEntries { ++ if _, found := entriesForDelete[entry]; found { ++ continue ++ } ++ newEntries = append(newEntries, entry) ++ } ++ record.MountTargetEntries = newEntries ++ return m.setMountTargetRecord(vmi, record) ++} ++ ++func extractNameFromSocket(socketFile string) (string, error) { ++ base := filepath.Base(socketFile) ++ if strings.HasPrefix(base, "hotplug-container-disk-") && strings.HasSuffix(base, ".sock") { ++ name := strings.TrimPrefix(base, "hotplug-container-disk-") ++ name = strings.TrimSuffix(name, ".sock") ++ return name, nil ++ } ++ return "", fmt.Errorf("name not found in path") ++} ++ ++func (m *hotplugMounter) UmountAll(vmi *v1.VirtualMachineInstance) error { ++ if vmi.UID == "" { ++ return nil ++ } ++ ++ record, err := m.getMountTargetRecord(vmi) ++ if err != nil { ++ return err ++ } else if record == nil { ++ // no entries to unmount ++ ++ log.DefaultLogger().Object(vmi).Infof("No container disk mount entries found to unmount") ++ return nil ++ } ++ ++ log.DefaultLogger().Object(vmi).Infof("Found container disk mount entries") ++ for _, entry := range record.MountTargetEntries { ++ log.DefaultLogger().Object(vmi).Infof("Looking to see if containerdisk is mounted at path %s", entry.TargetFile) ++ file, err := safepath.NewFileNoFollow(entry.TargetFile) ++ if err != nil { ++ if errors.Is(err, os.ErrNotExist) { ++ continue ++ } ++ return fmt.Errorf(failedCheckMountPointFmt, entry.TargetFile, err) ++ } ++ _ = file.Close() ++ if mounted, err := isolation.IsMounted(file.Path()); err != nil { ++ return fmt.Errorf(failedCheckMountPointFmt, file, err) ++ } else if mounted { ++ log.DefaultLogger().Object(vmi).Infof("unmounting container disk at path %s", file) ++ // #nosec No risk for attacket injection. Parameters are predefined strings ++ out, err := virt_chroot.UmountChroot(file.Path()).CombinedOutput() ++ if err != nil { ++ return fmt.Errorf(failedUnmountFmt, file, string(out), err) ++ } ++ } ++ } ++ err = m.deleteMountTargetRecord(vmi) ++ if err != nil { ++ return err ++ } ++ ++ return nil ++} ++ ++func (m *hotplugMounter) ContainerDisksReady(vmi *v1.VirtualMachineInstance, notInitializedSince time.Time, sourceUID types.UID) (bool, error) { ++ for _, volume := range vmi.Spec.Volumes { ++ if containerdisk.IsHotplugContainerDisk(&volume) { ++ _, err := m.hotplugPathGetter(vmi, volume.Name, sourceUID) ++ if err != nil { ++ log.DefaultLogger().Object(vmi).Reason(err).Infof("hotplug containerdisk %s not yet ready", volume.Name) ++ if time.Now().After(notInitializedSince.Add(m.suppressWarningTimeout)) { ++ return false, fmt.Errorf("hotplug containerdisk %s still not ready after one minute", volume.Name) ++ } ++ return false, nil ++ } ++ } ++ } ++ ++ log.DefaultLogger().Object(vmi).V(4).Info("all hotplug containerdisks are ready") ++ return true, nil ++} ++ ++func (m *hotplugMounter) getContainerDiskPath(vmi *v1.VirtualMachineInstance, volume *v1.Volume, volumeName string, sourceUID types.UID) (*safepath.Path, error) { ++ sock, err := m.hotplugPathGetter(vmi, volumeName, sourceUID) ++ if err != nil { ++ return nil, ErrDiskContainerGone ++ } ++ ++ res, err := m.podIsolationDetector.DetectForSocket(vmi, sock) ++ if err != nil { ++ return nil, fmt.Errorf("failed to detect socket for containerDisk %v: %v", volume.Name, err) ++ } ++ ++ mountPoint, err := isolation.ParentPathForRootMount(m.nodeIsolationResult, res) ++ if err != nil { ++ return nil, fmt.Errorf("failed to detect root mount point of containerDisk %v on the node: %v", volume.Name, err) ++ } ++ ++ return containerdisk.GetImage(mountPoint, volume.ContainerDisk.Path) ++} ++ ++func (m *hotplugMounter) ComputeChecksums(vmi *v1.VirtualMachineInstance, sourceUID types.UID) (*DiskChecksums, error) { ++ ++ diskChecksums := &DiskChecksums{ ++ ContainerDiskChecksums: map[string]uint32{}, ++ } ++ ++ for _, volume := range vmi.Spec.Volumes { ++ if volume.VolumeSource.ContainerDisk == nil || !volume.VolumeSource.ContainerDisk.Hotpluggable { ++ continue ++ } ++ ++ path, err := m.getContainerDiskPath(vmi, &volume, volume.Name, sourceUID) ++ if err != nil { ++ return nil, err ++ } ++ ++ checksum, err := getDigest(path) ++ if err != nil { ++ return nil, err ++ } ++ ++ diskChecksums.ContainerDiskChecksums[volume.Name] = checksum ++ } ++ ++ return diskChecksums, nil ++} ++ ++func (m *hotplugMounter) findVirtlauncherUID(vmi *v1.VirtualMachineInstance) (uid types.UID) { ++ cnt := 0 ++ for podUID := range vmi.Status.ActivePods { ++ _, err := m.hotplugManager.GetHotplugTargetPodPathOnHost(podUID) ++ if err == nil { ++ uid = podUID ++ cnt++ ++ } ++ } ++ if cnt == 1 { ++ return ++ } ++ // Either no pods, or multiple pods, skip. ++ return types.UID("") ++} ++ ++func GetMigrationAttachmentPodUID(vmi *v1.VirtualMachineInstance) (types.UID, bool) { ++ if attachmentPodUID := vmi.Status.MigrationState.TargetAttachmentPodUID; attachmentPodUID != types.UID("") { ++ return attachmentPodUID, true ++ } ++ return types.UID(""), false ++} ++ ++func VerifyHotplugChecksums(mounter HotplugMounter, vmi *v1.VirtualMachineInstance, sourceUID types.UID) error { ++ diskChecksums, err := mounter.ComputeChecksums(vmi, sourceUID) ++ if err != nil { ++ return fmt.Errorf("failed to compute hotplug container disk checksums: %s", err) ++ } ++ ++ for _, volumeStatus := range vmi.Status.VolumeStatus { ++ if volumeStatus.ContainerDiskVolume == nil || volumeStatus.HotplugVolume == nil { ++ continue ++ } ++ ++ expectedChecksum := volumeStatus.ContainerDiskVolume.Checksum ++ computedChecksum := diskChecksums.ContainerDiskChecksums[volumeStatus.Name] ++ if err := compareChecksums(expectedChecksum, computedChecksum); err != nil { ++ return fmt.Errorf("checksum error for hotplug volume %s: %w", volumeStatus.Name, err) ++ } ++ } ++ return nil ++} +diff --git a/pkg/virt-handler/container-disk/mount.go b/pkg/virt-handler/container-disk/mount.go +index 953c20f3af..d33a571495 100644 +--- a/pkg/virt-handler/container-disk/mount.go ++++ b/pkg/virt-handler/container-disk/mount.go +@@ -254,7 +254,7 @@ func (m *mounter) MountAndVerify(vmi *v1.VirtualMachineInstance) (map[string]*co + disksInfo := map[string]*containerdisk.DiskInfo{} + + for i, volume := range vmi.Spec.Volumes { +- if volume.ContainerDisk != nil { ++ if volume.ContainerDisk != nil && !volume.ContainerDisk.Hotpluggable { + diskTargetDir, err := containerdisk.GetDiskTargetDirFromHostView(vmi) + if err != nil { + return nil, err +@@ -296,7 +296,7 @@ func (m *mounter) MountAndVerify(vmi *v1.VirtualMachineInstance) (map[string]*co + } + + for i, volume := range vmi.Spec.Volumes { +- if volume.ContainerDisk != nil { ++ if volume.ContainerDisk != nil && !volume.ContainerDisk.Hotpluggable { + diskTargetDir, err := containerdisk.GetDiskTargetDirFromHostView(vmi) + if err != nil { + return nil, err +@@ -394,7 +394,7 @@ func (m *mounter) Unmount(vmi *v1.VirtualMachineInstance) error { + + func (m *mounter) ContainerDisksReady(vmi *v1.VirtualMachineInstance, notInitializedSince time.Time) (bool, error) { + for i, volume := range vmi.Spec.Volumes { +- if volume.ContainerDisk != nil { ++ if volume.ContainerDisk != nil && !volume.ContainerDisk.Hotpluggable { + _, err := m.socketPathGetter(vmi, i) + if err != nil { + log.DefaultLogger().Object(vmi).Reason(err).Infof("containerdisk %s not yet ready", volume.Name) +@@ -706,7 +706,7 @@ func (m *mounter) ComputeChecksums(vmi *v1.VirtualMachineInstance) (*DiskChecksu + + // compute for containerdisks + for i, volume := range vmi.Spec.Volumes { +- if volume.VolumeSource.ContainerDisk == nil { ++ if volume.VolumeSource.ContainerDisk == nil || volume.VolumeSource.ContainerDisk.Hotpluggable { + continue + } + +@@ -771,7 +771,7 @@ func VerifyChecksums(mounter Mounter, vmi *v1.VirtualMachineInstance) error { + + // verify containerdisks + for _, volumeStatus := range vmi.Status.VolumeStatus { +- if volumeStatus.ContainerDiskVolume == nil { ++ if volumeStatus.ContainerDiskVolume == nil || volumeStatus.HotplugVolume != nil { + continue + } + +diff --git a/pkg/virt-handler/hotplug-disk/mount.go b/pkg/virt-handler/hotplug-disk/mount.go +index 971c8d55fc..034c3d8526 100644 +--- a/pkg/virt-handler/hotplug-disk/mount.go ++++ b/pkg/virt-handler/hotplug-disk/mount.go +@@ -343,7 +343,7 @@ func (m *volumeMounter) mountFromPod(vmi *v1.VirtualMachineInstance, sourceUID t + return err + } + for _, volumeStatus := range vmi.Status.VolumeStatus { +- if volumeStatus.HotplugVolume == nil { ++ if volumeStatus.HotplugVolume == nil || volumeStatus.ContainerDiskVolume != nil { + // Skip non hotplug volumes + continue + } +@@ -649,7 +649,7 @@ func (m *volumeMounter) Unmount(vmi *v1.VirtualMachineInstance, cgroupManager cg + return err + } + for _, volumeStatus := range vmi.Status.VolumeStatus { +- if volumeStatus.HotplugVolume == nil { ++ if volumeStatus.HotplugVolume == nil || volumeStatus.ContainerDiskVolume != nil { + continue + } + var path *safepath.Path +diff --git a/pkg/virt-handler/isolation/detector.go b/pkg/virt-handler/isolation/detector.go +index f83f96ead4..5e38c6cedd 100644 +--- a/pkg/virt-handler/isolation/detector.go ++++ b/pkg/virt-handler/isolation/detector.go +@@ -24,6 +24,8 @@ package isolation + import ( + "fmt" + "net" ++ "os" ++ "path" + "runtime" + "syscall" + "time" +@@ -207,12 +209,45 @@ func setProcessMemoryLockRLimit(pid int, size int64) error { + return nil + } + ++type deferFunc func() ++ ++func (s *socketBasedIsolationDetector) socketHack(socket string) (sock net.Conn, deferFunc deferFunc, err error) { ++ fn := func() {} ++ if len([]rune(socket)) <= 108 { ++ sock, err = net.DialTimeout("unix", socket, time.Duration(isolationDialTimeout)*time.Second) ++ fn = func() { ++ if err == nil { ++ sock.Close() ++ } ++ } ++ return sock, fn, err ++ } ++ base := path.Base(socket) ++ newPath := fmt.Sprintf("/tmp/%s", base) ++ if err = os.Symlink(socket, newPath); err != nil { ++ return nil, fn, err ++ } ++ sock, err = net.DialTimeout("unix", newPath, time.Duration(isolationDialTimeout)*time.Second) ++ fn = func() { ++ if err == nil { ++ sock.Close() ++ } ++ os.Remove(newPath) ++ } ++ return sock, fn, err ++} ++ + func (s *socketBasedIsolationDetector) getPid(socket string) (int, error) { +- sock, err := net.DialTimeout("unix", socket, time.Duration(isolationDialTimeout)*time.Second) ++ sock, defFn, err := s.socketHack(socket) ++ defer defFn() + if err != nil { + return -1, err + } +- defer sock.Close() ++ //sock, err := net.DialTimeout("unix", socket, time.Duration(isolationDialTimeout)*time.Second) ++ //if err != nil { ++ // return -1, err ++ //} ++ //defer sock.Close() + + ufile, err := sock.(*net.UnixConn).File() + if err != nil { +diff --git a/pkg/virt-handler/virt-chroot/virt-chroot.go b/pkg/virt-handler/virt-chroot/virt-chroot.go +index 4160212b7b..580b788acc 100644 +--- a/pkg/virt-handler/virt-chroot/virt-chroot.go ++++ b/pkg/virt-handler/virt-chroot/virt-chroot.go +@@ -20,7 +20,10 @@ + package virt_chroot + + import ( ++ "bytes" ++ "fmt" + "os/exec" ++ "slices" + "strings" + + "kubevirt.io/kubevirt/pkg/safepath" +@@ -48,6 +51,49 @@ func MountChroot(sourcePath, targetPath *safepath.Path, ro bool) *exec.Cmd { + return UnsafeMountChroot(trimProcPrefix(sourcePath), trimProcPrefix(targetPath), ro) + } + ++func MountChrootWithOptions(sourcePath, targetPath *safepath.Path, mountOptions ...string) error { ++ args := append(getBaseArgs(), "mount") ++ remountArgs := slices.Clone(args) ++ ++ mountOptions = slices.DeleteFunc(mountOptions, func(s string) bool { ++ return s == "remount" ++ }) ++ if len(mountOptions) > 0 { ++ opts := strings.Join(mountOptions, ",") ++ remountOpts := "remount," + opts ++ args = append(args, "-o", opts) ++ remountArgs = append(remountArgs, "-o", remountOpts) ++ } ++ ++ sp := trimProcPrefix(sourcePath) ++ tp := trimProcPrefix(targetPath) ++ args = append(args, sp, tp) ++ remountArgs = append(remountArgs, sp, tp) ++ ++ stdout := new(bytes.Buffer) ++ stderr := new(bytes.Buffer) ++ ++ cmd := exec.Command(binaryPath, args...) ++ cmd.Stdout = stdout ++ cmd.Stderr = stderr ++ err := cmd.Run() ++ if err != nil { ++ return fmt.Errorf("mount failed: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) ++ } ++ ++ stdout = new(bytes.Buffer) ++ stderr = new(bytes.Buffer) ++ ++ remountCmd := exec.Command(binaryPath, remountArgs...) ++ cmd.Stdout = stdout ++ cmd.Stderr = stderr ++ err = remountCmd.Run() ++ if err != nil { ++ return fmt.Errorf("mount failed: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) ++ } ++ return nil ++} ++ + // Deprecated: UnsafeMountChroot is used to connect to code which needs to be refactored + // to handle mounts securely. + func UnsafeMountChroot(sourcePath, targetPath string, ro bool) *exec.Cmd { +diff --git a/pkg/virt-handler/vm.go b/pkg/virt-handler/vm.go +index 24352cf6e9..301d7b2249 100644 +--- a/pkg/virt-handler/vm.go ++++ b/pkg/virt-handler/vm.go +@@ -25,6 +25,7 @@ import ( + goerror "errors" + "fmt" + "io" ++ "maps" + "net" + "os" + "path/filepath" +@@ -247,6 +248,13 @@ func NewController( + vmiExpectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), + sriovHotplugExecutorPool: executor.NewRateLimitedExecutorPool(executor.NewExponentialLimitedBackoffCreator()), + ioErrorRetryManager: NewFailRetryManager("io-error-retry", 10*time.Second, 3*time.Minute, 30*time.Second), ++ ++ hotplugContainerDiskMounter: container_disk.NewHotplugMounter( ++ podIsolationDetector, ++ filepath.Join(virtPrivateDir, "hotplug-container-disk-mount-state"), ++ clusterConfig, ++ hotplugdisk.NewHotplugDiskManager(kubeletPodsDir), ++ ), + } + + _, err := vmiSourceInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ +@@ -342,6 +350,8 @@ type VirtualMachineController struct { + hostCpuModel string + vmiExpectations *controller.UIDTrackingControllerExpectations + ioErrorRetryManager *FailRetryManager ++ ++ hotplugContainerDiskMounter container_disk.HotplugMounter + } + + type virtLauncherCriticalSecurebootError struct { +@@ -876,7 +886,15 @@ func (d *VirtualMachineController) updateHotplugVolumeStatus(vmi *v1.VirtualMach + needsRefresh := false + if volumeStatus.Target == "" { + needsRefresh = true +- mounted, err := d.hotplugVolumeMounter.IsMounted(vmi, volumeStatus.Name, volumeStatus.HotplugVolume.AttachPodUID) ++ var ( ++ mounted bool ++ err error ++ ) ++ if volumeStatus.ContainerDiskVolume != nil { ++ mounted, err = d.hotplugContainerDiskMounter.IsMounted(vmi, volumeStatus.Name) ++ } else { ++ mounted, err = d.hotplugVolumeMounter.IsMounted(vmi, volumeStatus.Name, volumeStatus.HotplugVolume.AttachPodUID) ++ } + if err != nil { + log.Log.Object(vmi).Errorf("error occurred while checking if volume is mounted: %v", err) + } +@@ -898,6 +916,7 @@ func (d *VirtualMachineController) updateHotplugVolumeStatus(vmi *v1.VirtualMach + volumeStatus.Reason = VolumeUnMountedFromPodReason + } + } ++ + } else { + // Successfully attached to VM. + volumeStatus.Phase = v1.VolumeReady +@@ -966,17 +985,28 @@ func (d *VirtualMachineController) updateChecksumInfo(vmi *v1.VirtualMachineInst + if err != nil { + return err + } ++ hotplugDiskChecksums, err := d.hotplugContainerDiskMounter.ComputeChecksums(vmi, "") ++ if goerror.Is(err, container_disk.ErrDiskContainerGone) { ++ log.Log.Errorf("cannot compute checksums as hotplug containerdisk containers seem to have been terminated") ++ return nil ++ } ++ if err != nil { ++ return err ++ } + + // containerdisks + for i := range vmi.Status.VolumeStatus { + checksum, exists := diskChecksums.ContainerDiskChecksums[vmi.Status.VolumeStatus[i].Name] +- if !exists { +- // not a containerdisk +- continue ++ if exists { ++ vmi.Status.VolumeStatus[i].ContainerDiskVolume = &v1.ContainerDiskInfo{ ++ Checksum: checksum, ++ } + } +- +- vmi.Status.VolumeStatus[i].ContainerDiskVolume = &v1.ContainerDiskInfo{ +- Checksum: checksum, ++ hotplugChecksum, hotplugExists := hotplugDiskChecksums.ContainerDiskChecksums[vmi.Status.VolumeStatus[i].Name] ++ if hotplugExists { ++ vmi.Status.VolumeStatus[i].ContainerDiskVolume = &v1.ContainerDiskInfo{ ++ Checksum: hotplugChecksum, ++ } + } + } + +@@ -2178,6 +2208,11 @@ func (d *VirtualMachineController) processVmCleanup(vmi *v1.VirtualMachineInstan + return err + } + ++ err := d.hotplugContainerDiskMounter.UmountAll(vmi) ++ if err != nil { ++ return err ++ } ++ + // UnmountAll does the cleanup on the "best effort" basis: it is + // safe to pass a nil cgroupManager. + cgroupManager, _ := getCgroupManager(vmi) +@@ -2808,7 +2843,6 @@ func (d *VirtualMachineController) vmUpdateHelperMigrationTarget(origVMI *v1.Vir + d.Queue.AddAfter(controller.VirtualMachineInstanceKey(vmi), time.Second*1) + return nil + } +- + // Verify container disks checksum + err = container_disk.VerifyChecksums(d.containerDiskMounter, vmi) + switch { +@@ -2823,11 +2857,41 @@ func (d *VirtualMachineController) vmUpdateHelperMigrationTarget(origVMI *v1.Vir + return err + } + ++ if uid, found := container_disk.GetMigrationAttachmentPodUID(vmi); found { ++ if ready, err := d.hotplugContainerDiskMounter.ContainerDisksReady(vmi, info.NotInitializedSince, uid); !ready { ++ if err != nil { ++ return err ++ } ++ d.Queue.AddAfter(controller.VirtualMachineInstanceKey(vmi), time.Second*1) ++ return nil ++ } ++ // Verify hotplug container disks checksum ++ err = container_disk.VerifyHotplugChecksums(d.hotplugContainerDiskMounter, vmi, uid) ++ switch { ++ case goerror.Is(err, container_disk.ErrChecksumMissing): ++ // wait for checksum to be computed by the source virt-handler ++ return err ++ case goerror.Is(err, container_disk.ErrChecksumMismatch): ++ log.Log.Object(vmi).Infof("HotplugContainerdisk checksum mismatch, terminating target pod: %s", err) ++ d.recorder.Event(vmi, k8sv1.EventTypeNormal, "HotplugContainerDiskFailedChecksum", "Aborting migration as the source and target containerdisks do not match") ++ return client.SignalTargetPodCleanup(vmi) ++ case err != nil: ++ return err ++ } ++ } ++ + // Mount container disks + disksInfo, err := d.containerDiskMounter.MountAndVerify(vmi) + if err != nil { + return err + } ++ if uid, found := container_disk.GetMigrationAttachmentPodUID(vmi); found { ++ hotplugDiskInfo, err := d.hotplugContainerDiskMounter.MoundAndVerifyFromPod(vmi, uid) ++ if err != nil { ++ return err ++ } ++ maps.Copy(disksInfo, hotplugDiskInfo) ++ } + + // Mount hotplug disks + if attachmentPodUID := vmi.Status.MigrationState.TargetAttachmentPodUID; attachmentPodUID != types.UID("") { +@@ -3051,6 +3115,13 @@ func (d *VirtualMachineController) vmUpdateHelperDefault(origVMI *v1.VirtualMach + if err != nil { + return err + } ++ if ready, err := d.hotplugContainerDiskMounter.ContainerDisksReady(vmi, info.NotInitializedSince, ""); ready && err != nil { ++ hotplugDiskInfo, err := d.hotplugContainerDiskMounter.MountAndVerify(vmi) ++ if err != nil { ++ return err ++ } ++ maps.Copy(disksInfo, hotplugDiskInfo) ++ } + + // Try to mount hotplug volume if there is any during startup. + if err := d.hotplugVolumeMounter.Mount(vmi, cgroupManager); err != nil { +@@ -3138,6 +3209,11 @@ func (d *VirtualMachineController) vmUpdateHelperDefault(origVMI *v1.VirtualMach + log.Log.Object(vmi).Error(err.Error()) + } + ++ hotplugDiskInfo, err := d.hotplugContainerDiskMounter.MountAndVerify(vmi) ++ if err != nil { ++ return err ++ } ++ maps.Copy(disksInfo, hotplugDiskInfo) + if err := d.hotplugVolumeMounter.Mount(vmi, cgroupManager); err != nil { + return err + } +@@ -3215,6 +3291,9 @@ func (d *VirtualMachineController) vmUpdateHelperDefault(origVMI *v1.VirtualMach + + if vmi.IsRunning() { + // Umount any disks no longer mounted ++ if err := d.hotplugContainerDiskMounter.Umount(vmi); err != nil { ++ return err ++ } + if err := d.hotplugVolumeMounter.Unmount(vmi, cgroupManager); err != nil { + return err + } +diff --git a/pkg/virt-launcher/virtwrap/converter/converter.go b/pkg/virt-launcher/virtwrap/converter/converter.go +index 3318c1c466..393415c36c 100644 +--- a/pkg/virt-launcher/virtwrap/converter/converter.go ++++ b/pkg/virt-launcher/virtwrap/converter/converter.go +@@ -649,6 +649,9 @@ func Convert_v1_Hotplug_Volume_To_api_Disk(source *v1.Volume, disk *api.Disk, c + if source.DataVolume != nil { + return Convert_v1_Hotplug_DataVolume_To_api_Disk(source.Name, disk, c) + } ++ if source.ContainerDisk != nil { ++ return Convert_v1_Hotplug_ContainerDisk_To_api_Disk(source.Name, disk, c) ++ } + return fmt.Errorf("hotplug disk %s references an unsupported source", disk.Alias.GetName()) + } + +@@ -690,6 +693,10 @@ func GetHotplugBlockDeviceVolumePath(volumeName string) string { + return filepath.Join(string(filepath.Separator), "var", "run", "kubevirt", "hotplug-disks", volumeName) + } + ++func GetHotplugContainerDiskPath(volumeName string) string { ++ return filepath.Join(string(filepath.Separator), "var", "run", "kubevirt", "hotplug-disks", fmt.Sprintf("%s.img", volumeName)) ++} ++ + func Convert_v1_PersistentVolumeClaim_To_api_Disk(name string, disk *api.Disk, c *ConverterContext) error { + if c.IsBlockPVC[name] { + return Convert_v1_BlockVolumeSource_To_api_Disk(name, disk, c.VolumesDiscardIgnore) +@@ -768,6 +775,37 @@ func Convert_v1_Hotplug_BlockVolumeSource_To_api_Disk(volumeName string, disk *a + return nil + } + ++var ErrHotplugContainerDiskInfoNotProvided = errors.New("hotplug container disk info not provided") ++ ++func Convert_v1_Hotplug_ContainerDisk_To_api_Disk(volumeName string, disk *api.Disk, c *ConverterContext) error { ++ if disk.Type == "lun" { ++ return fmt.Errorf(deviceTypeNotCompatibleFmt, disk.Alias.GetName()) ++ } ++ info := c.DisksInfo[volumeName] ++ if info == nil { ++ return fmt.Errorf("no disk info provided for volume %s: %w", volumeName, ErrHotplugContainerDiskInfoNotProvided) ++ } ++ ++ disk.Type = "file" ++ disk.Driver.Type = info.Format ++ disk.Driver.ErrorPolicy = v1.DiskErrorPolicyStop ++ disk.ReadOnly = toApiReadOnly(true) ++ if !contains(c.VolumesDiscardIgnore, volumeName) { ++ disk.Driver.Discard = "unmap" ++ } ++ disk.Source.File = GetHotplugContainerDiskPath(volumeName) ++ disk.BackingStore = &api.BackingStore{ ++ Format: &api.BackingStoreFormat{}, ++ Source: &api.DiskSource{}, ++ } ++ ++ //disk.BackingStore.Format.Type = info.Format ++ //disk.BackingStore.Source.File = info.BackingFile ++ //disk.BackingStore.Type = "file" ++ ++ return nil ++} ++ + func Convert_v1_HostDisk_To_api_Disk(volumeName string, path string, disk *api.Disk) error { + disk.Type = "file" + disk.Driver.Type = "raw" +@@ -1581,6 +1619,10 @@ func Convert_v1_VirtualMachineInstance_To_api_Domain(vmi *v1.VirtualMachineInsta + err = Convert_v1_Volume_To_api_Disk(volume, &newDisk, c, volumeIndices[disk.Name]) + } else { + err = Convert_v1_Hotplug_Volume_To_api_Disk(volume, &newDisk, c) ++ if errors.Is(err, ErrHotplugContainerDiskInfoNotProvided) { ++ log.DefaultLogger().Warningf("Hotplug container disk info not provided. Skip hotplug disk %s", disk.Name) ++ continue ++ } + } + if err != nil { + return err +diff --git a/pkg/virt-launcher/virtwrap/live-migration-source.go b/pkg/virt-launcher/virtwrap/live-migration-source.go +index d8b777e5fb..f580d06e52 100644 +--- a/pkg/virt-launcher/virtwrap/live-migration-source.go ++++ b/pkg/virt-launcher/virtwrap/live-migration-source.go +@@ -289,8 +289,10 @@ func classifyVolumesForMigration(vmi *v1.VirtualMachineInstance) *migrationDisks + + case volSrc.ConfigMap != nil || volSrc.Secret != nil || volSrc.DownwardAPI != nil || + volSrc.ServiceAccount != nil || volSrc.CloudInitNoCloud != nil || +- volSrc.CloudInitConfigDrive != nil || volSrc.ContainerDisk != nil: ++ volSrc.CloudInitConfigDrive != nil || (volSrc.ContainerDisk != nil && !volSrc.ContainerDisk.Hotpluggable): + disks.generated[volume.Name] = true ++ case volSrc.ContainerDisk != nil && volSrc.ContainerDisk.Hotpluggable: ++ disks.shared[volume.Name] = true + } + } + +diff --git a/pkg/virt-launcher/virtwrap/manager.go b/pkg/virt-launcher/virtwrap/manager.go +index 4a1d22de46..cddee4f199 100644 +--- a/pkg/virt-launcher/virtwrap/manager.go ++++ b/pkg/virt-launcher/virtwrap/manager.go +@@ -32,6 +32,7 @@ import ( + "errors" + "fmt" + "io" ++ "maps" + "os" + "os/exec" + "path/filepath" +@@ -1029,9 +1030,7 @@ func (l *LibvirtDomainManager) generateConverterContext(vmi *v1.VirtualMachineIn + // Add preallocated and thick-provisioned volumes for which we need to avoid the discard=unmap option + c.VolumesDiscardIgnore = options.PreallocatedVolumes + +- if len(options.DisksInfo) > 0 { +- l.disksInfo = options.DisksInfo +- } ++ maps.Copy(l.disksInfo, options.DisksInfo) + + if options.GetClusterConfig() != nil { + c.ExpandDisksEnabled = options.GetClusterConfig().GetExpandDisksEnabled() +diff --git a/pkg/virt-operator/resource/apply/BUILD.bazel b/pkg/virt-operator/resource/apply/BUILD.bazel +index f6bd9bd4f1..fe6ab54f8c 100644 +--- a/pkg/virt-operator/resource/apply/BUILD.bazel ++++ b/pkg/virt-operator/resource/apply/BUILD.bazel +@@ -4,7 +4,6 @@ go_library( + name = "go_default_library", + srcs = [ + "admissionregistration.go", +- "apiservices.go", + "apps.go", + "certificates.go", + "core.go", +@@ -65,7 +64,6 @@ go_library( + "//vendor/k8s.io/client-go/tools/cache:go_default_library", + "//vendor/k8s.io/client-go/tools/record:go_default_library", + "//vendor/k8s.io/client-go/util/workqueue:go_default_library", +- "//vendor/k8s.io/kube-aggregator/pkg/apis/apiregistration/v1:go_default_library", + "//vendor/k8s.io/utils/pointer:go_default_library", + ], + ) +diff --git a/pkg/virt-operator/resource/generate/components/BUILD.bazel b/pkg/virt-operator/resource/generate/components/BUILD.bazel +index 70d2da0897..affcd3fecd 100644 +--- a/pkg/virt-operator/resource/generate/components/BUILD.bazel ++++ b/pkg/virt-operator/resource/generate/components/BUILD.bazel +@@ -3,7 +3,6 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + go_library( + name = "go_default_library", + srcs = [ +- "apiservices.go", + "crds.go", + "daemonsets.go", + "deployments.go", +@@ -62,7 +61,6 @@ go_library( + "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/yaml:go_default_library", +- "//vendor/k8s.io/kube-aggregator/pkg/apis/apiregistration/v1:go_default_library", + "//vendor/k8s.io/utils/pointer:go_default_library", + ], + ) +@@ -70,7 +68,6 @@ go_library( + go_test( + name = "go_default_test", + srcs = [ +- "apiservices_test.go", + "components_suite_test.go", + "crds_test.go", + "deployments_test.go", +@@ -85,7 +82,6 @@ go_test( + deps = [ + "//pkg/certificates/bootstrap:go_default_library", + "//pkg/certificates/triple/cert:go_default_library", +- "//staging/src/kubevirt.io/api/core/v1:go_default_library", + "//staging/src/kubevirt.io/client-go/testutils:go_default_library", + "//vendor/github.com/onsi/ginkgo/v2:go_default_library", + "//vendor/github.com/onsi/gomega:go_default_library", +diff --git a/pkg/virt-operator/resource/generate/components/validations_generated.go b/pkg/virt-operator/resource/generate/components/validations_generated.go +index 4913dbead0..42225780ba 100644 +--- a/pkg/virt-operator/resource/generate/components/validations_generated.go ++++ b/pkg/virt-operator/resource/generate/components/validations_generated.go +@@ -7723,6 +7723,8 @@ var CRDsValidation map[string]string = map[string]string{ + ContainerDisk references a docker image, embedding a qcow or raw disk. + More info: https://kubevirt.gitbooks.io/user-guide/registry-disk.html + properties: ++ hotpluggable: ++ type: boolean + image: + description: Image is the name of the image with the embedded + disk. +@@ -8355,6 +8357,35 @@ var CRDsValidation map[string]string = map[string]string{ + description: VolumeSource represents the source of the volume + to map to the disk. + properties: ++ containerDisk: ++ description: Represents a docker image with an embedded disk. ++ properties: ++ hotpluggable: ++ type: boolean ++ image: ++ description: Image is the name of the image with the embedded ++ disk. ++ type: string ++ imagePullPolicy: ++ description: |- ++ Image pull policy. ++ One of Always, Never, IfNotPresent. ++ Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. ++ Cannot be updated. ++ More info: https://kubernetes.io/docs/concepts/containers/images#updating-images ++ type: string ++ imagePullSecret: ++ description: ImagePullSecret is the name of the Docker ++ registry secret required to pull the image. The secret ++ must already exist. ++ type: string ++ path: ++ description: Path defines the path to disk file in the ++ container ++ type: string ++ required: ++ - image ++ type: object + dataVolume: + description: |- + DataVolume represents the dynamic creation a PVC for this volume as well as +@@ -12768,6 +12799,8 @@ var CRDsValidation map[string]string = map[string]string{ + ContainerDisk references a docker image, embedding a qcow or raw disk. + More info: https://kubevirt.gitbooks.io/user-guide/registry-disk.html + properties: ++ hotpluggable: ++ type: boolean + image: + description: Image is the name of the image with the embedded + disk. +@@ -18328,6 +18361,8 @@ var CRDsValidation map[string]string = map[string]string{ + ContainerDisk references a docker image, embedding a qcow or raw disk. + More info: https://kubevirt.gitbooks.io/user-guide/registry-disk.html + properties: ++ hotpluggable: ++ type: boolean + image: + description: Image is the name of the image with the embedded + disk. +@@ -22835,6 +22870,8 @@ var CRDsValidation map[string]string = map[string]string{ + ContainerDisk references a docker image, embedding a qcow or raw disk. + More info: https://kubevirt.gitbooks.io/user-guide/registry-disk.html + properties: ++ hotpluggable: ++ type: boolean + image: + description: Image is the name of the image with + the embedded disk. +@@ -28015,6 +28052,8 @@ var CRDsValidation map[string]string = map[string]string{ + ContainerDisk references a docker image, embedding a qcow or raw disk. + More info: https://kubevirt.gitbooks.io/user-guide/registry-disk.html + properties: ++ hotpluggable: ++ type: boolean + image: + description: Image is the name of the image + with the embedded disk. +@@ -28673,6 +28712,36 @@ var CRDsValidation map[string]string = map[string]string{ + description: VolumeSource represents the source of + the volume to map to the disk. + properties: ++ containerDisk: ++ description: Represents a docker image with an ++ embedded disk. ++ properties: ++ hotpluggable: ++ type: boolean ++ image: ++ description: Image is the name of the image ++ with the embedded disk. ++ type: string ++ imagePullPolicy: ++ description: |- ++ Image pull policy. ++ One of Always, Never, IfNotPresent. ++ Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. ++ Cannot be updated. ++ More info: https://kubernetes.io/docs/concepts/containers/images#updating-images ++ type: string ++ imagePullSecret: ++ description: ImagePullSecret is the name of ++ the Docker registry secret required to pull ++ the image. The secret must already exist. ++ type: string ++ path: ++ description: Path defines the path to disk ++ file in the container ++ type: string ++ required: ++ - image ++ type: object + dataVolume: + description: |- + DataVolume represents the dynamic creation a PVC for this volume as well as +diff --git a/pkg/virt-operator/resource/generate/install/generated_mock_strategy.go b/pkg/virt-operator/resource/generate/install/generated_mock_strategy.go +index 5f1e9a3121..1fa1416af0 100644 +--- a/pkg/virt-operator/resource/generate/install/generated_mock_strategy.go ++++ b/pkg/virt-operator/resource/generate/install/generated_mock_strategy.go +@@ -241,16 +241,6 @@ func (_mr *_MockStrategyInterfaceRecorder) MutatingWebhookConfigurations() *gomo + return _mr.mock.ctrl.RecordCall(_mr.mock, "MutatingWebhookConfigurations") + } + +-func (_m *MockStrategyInterface) APIServices() []*v18.APIService { +- ret := _m.ctrl.Call(_m, "APIServices") +- ret0, _ := ret[0].([]*v18.APIService) +- return ret0 +-} +- +-func (_mr *_MockStrategyInterfaceRecorder) APIServices() *gomock.Call { +- return _mr.mock.ctrl.RecordCall(_mr.mock, "APIServices") +-} +- + func (_m *MockStrategyInterface) CertificateSecrets() []*v14.Secret { + ret := _m.ctrl.Call(_m, "CertificateSecrets") + ret0, _ := ret[0].([]*v14.Secret) +diff --git a/pkg/virt-operator/resource/generate/rbac/exportproxy.go b/pkg/virt-operator/resource/generate/rbac/exportproxy.go +index ebc9f2adbd..a0dc0586b4 100644 +--- a/pkg/virt-operator/resource/generate/rbac/exportproxy.go ++++ b/pkg/virt-operator/resource/generate/rbac/exportproxy.go +@@ -23,6 +23,7 @@ import ( + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" ++ + "kubevirt.io/kubevirt/pkg/virt-operator/resource/generate/components" + + virtv1 "kubevirt.io/api/core/v1" +diff --git a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.json b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.json +index b651173636..3453dfb0da 100644 +--- a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.json ++++ b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.json +@@ -754,7 +754,8 @@ + "image": "imageValue", + "imagePullSecret": "imagePullSecretValue", + "path": "pathValue", +- "imagePullPolicy": "imagePullPolicyValue" ++ "imagePullPolicy": "imagePullPolicyValue", ++ "hotpluggable": true + }, + "ephemeral": { + "persistentVolumeClaim": { +@@ -1209,6 +1210,13 @@ + "dataVolume": { + "name": "nameValue", + "hotpluggable": true ++ }, ++ "containerDisk": { ++ "image": "imageValue", ++ "imagePullSecret": "imagePullSecretValue", ++ "path": "pathValue", ++ "imagePullPolicy": "imagePullPolicyValue", ++ "hotpluggable": true + } + }, + "dryRun": [ +diff --git a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.yaml b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.yaml +index 53dfdacc3b..8b23193158 100644 +--- a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.yaml ++++ b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachine.yaml +@@ -719,6 +719,7 @@ spec: + optional: true + volumeLabel: volumeLabelValue + containerDisk: ++ hotpluggable: true + image: imageValue + imagePullPolicy: imagePullPolicyValue + imagePullSecret: imagePullSecretValue +@@ -838,6 +839,12 @@ status: + - dryRunValue + name: nameValue + volumeSource: ++ containerDisk: ++ hotpluggable: true ++ image: imageValue ++ imagePullPolicy: imagePullPolicyValue ++ imagePullSecret: imagePullSecretValue ++ path: pathValue + dataVolume: + hotpluggable: true + name: nameValue +diff --git a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.json b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.json +index 3be904512c..f595798e89 100644 +--- a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.json ++++ b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.json +@@ -694,7 +694,8 @@ + "image": "imageValue", + "imagePullSecret": "imagePullSecretValue", + "path": "pathValue", +- "imagePullPolicy": "imagePullPolicyValue" ++ "imagePullPolicy": "imagePullPolicyValue", ++ "hotpluggable": true + }, + "ephemeral": { + "persistentVolumeClaim": { +diff --git a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.yaml b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.yaml +index 6fd2ab6523..b6457ec94d 100644 +--- a/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.yaml ++++ b/staging/src/kubevirt.io/api/apitesting/testdata/HEAD/kubevirt.io.v1.VirtualMachineInstance.yaml +@@ -524,6 +524,7 @@ spec: + optional: true + volumeLabel: volumeLabelValue + containerDisk: ++ hotpluggable: true + image: imageValue + imagePullPolicy: imagePullPolicyValue + imagePullSecret: imagePullSecretValue +diff --git a/staging/src/kubevirt.io/api/core/v1/BUILD.bazel b/staging/src/kubevirt.io/api/core/v1/BUILD.bazel +index f8615293a3..0c6c166985 100644 +--- a/staging/src/kubevirt.io/api/core/v1/BUILD.bazel ++++ b/staging/src/kubevirt.io/api/core/v1/BUILD.bazel +@@ -28,7 +28,6 @@ go_library( + "//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", + "//vendor/k8s.io/utils/net:go_default_library", +- "//vendor/k8s.io/utils/pointer:go_default_library", + "//vendor/kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1:go_default_library", + ], + ) +diff --git a/staging/src/kubevirt.io/api/core/v1/deepcopy_generated.go b/staging/src/kubevirt.io/api/core/v1/deepcopy_generated.go +index abd5a495d6..7372b22a9a 100644 +--- a/staging/src/kubevirt.io/api/core/v1/deepcopy_generated.go ++++ b/staging/src/kubevirt.io/api/core/v1/deepcopy_generated.go +@@ -1948,6 +1948,11 @@ func (in *HotplugVolumeSource) DeepCopyInto(out *HotplugVolumeSource) { + *out = new(DataVolumeSource) + **out = **in + } ++ if in.ContainerDisk != nil { ++ in, out := &in.ContainerDisk, &out.ContainerDisk ++ *out = new(ContainerDiskSource) ++ **out = **in ++ } + return + } + +diff --git a/staging/src/kubevirt.io/api/core/v1/schema.go b/staging/src/kubevirt.io/api/core/v1/schema.go +index 29aa3932d3..302ed9ffde 100644 +--- a/staging/src/kubevirt.io/api/core/v1/schema.go ++++ b/staging/src/kubevirt.io/api/core/v1/schema.go +@@ -854,6 +854,8 @@ type HotplugVolumeSource struct { + // the process of populating that PVC with a disk image. + // +optional + DataVolume *DataVolumeSource `json:"dataVolume,omitempty"` ++ ++ ContainerDisk *ContainerDiskSource `json:"containerDisk,omitempty"` + } + + type DataVolumeSource struct { +@@ -911,6 +913,8 @@ type ContainerDiskSource struct { + // More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + // +optional + ImagePullPolicy v1.PullPolicy `json:"imagePullPolicy,omitempty"` ++ ++ Hotpluggable bool `json:"hotpluggable,omitempty"` + } + + // Exactly one of its members must be set. +diff --git a/staging/src/kubevirt.io/client-go/api/openapi_generated.go b/staging/src/kubevirt.io/client-go/api/openapi_generated.go +index cc2d743492..b982b1620c 100644 +--- a/staging/src/kubevirt.io/client-go/api/openapi_generated.go ++++ b/staging/src/kubevirt.io/client-go/api/openapi_generated.go +@@ -17772,6 +17772,12 @@ func schema_kubevirtio_api_core_v1_ContainerDiskSource(ref common.ReferenceCallb + Enum: []interface{}{"Always", "IfNotPresent", "Never"}, + }, + }, ++ "hotpluggable": { ++ SchemaProps: spec.SchemaProps{ ++ Type: []string{"boolean"}, ++ Format: "", ++ }, ++ }, + }, + Required: []string{"image"}, + }, +@@ -19645,11 +19651,16 @@ func schema_kubevirtio_api_core_v1_HotplugVolumeSource(ref common.ReferenceCallb + Ref: ref("kubevirt.io/api/core/v1.DataVolumeSource"), + }, + }, ++ "containerDisk": { ++ SchemaProps: spec.SchemaProps{ ++ Ref: ref("kubevirt.io/api/core/v1.ContainerDiskSource"), ++ }, ++ }, + }, + }, + }, + Dependencies: []string{ +- "kubevirt.io/api/core/v1.DataVolumeSource", "kubevirt.io/api/core/v1.PersistentVolumeClaimVolumeSource"}, ++ "kubevirt.io/api/core/v1.ContainerDiskSource", "kubevirt.io/api/core/v1.DataVolumeSource", "kubevirt.io/api/core/v1.PersistentVolumeClaimVolumeSource"}, + } + } + diff --git a/images/virt-artifact/patches/README.md b/images/virt-artifact/patches/README.md index 00dd4f5c3..ee1d8b3c9 100644 --- a/images/virt-artifact/patches/README.md +++ b/images/virt-artifact/patches/README.md @@ -140,3 +140,47 @@ d8-cni-cilium ensures that once the label is removed from the target pod, only t - Do not add cpu-model nodeSelector for "kvm64" model. This selector prevents starting VMs as node-labeler ignores to labeling nodes with "kvm64" model. - Overwrite calculated model on migration, put back "kvm64" for Discovery and Features vmclass types. + +--- +#### `032-hotplug-container-disk.patch` + +Add Hotplug container-disk volumes. +How `container-disk` and HotPlug Work +The `container-disk` is a program written in C used within KubeVirt to facilitate the mounting of container-based disk images into virtual machines. Its core function is to start up and create a UNIX socket within a specific directory. The program terminates when the socket is removed or upon receiving a `SIGTERM` signal. + +##### Key Workflow: `container-disk` + +##### Initialization +- A sidecar container, running the `container-disk` image, is created alongside the `virt-launcher` pod. +- An init-container in the `virt-launcher` pod copies the `container-disk` program to a shared `emptyDir` volume. This setup allows the sidecar to execute the program. + +##### Socket Creation + +- The `container-disk` program creates a socket in the `emptyDir` volume. +- This shared volume allows the `virt-handler` to locate the socket on the host machine at: + `/var/lib/kubelet/pods/.../volumes/kubernetes.io~empty-dir/`. + +##### Socket Detection and Mounting + +- Upon detecting the socket, `virt-handler` identifies it as a `container-disk` volume and retrieves its parent mount point. +- For a container runtime like `containerd`, the mount point resolves to the root filesystem of the pulled image, typically at: + `/run/containerd/io.containerd.runtime.v2.task/k8s.io//rootfs/`. +- The disk image must be located at `disk/disk.img` within this filesystem and is mounted into the VM. + +## HotPlug in KubeVirt +The HotPlug mechanism allows dynamic attachment of PVCs and `container-disk` volumes to a running VM by leveraging a separate `hotplug` pod. + +### HotPlug Pod Setup +- A `hotplug` pod is created with the target PVCs mounted into an `emptyDir` volume under the `/hp` directory. +- The `container-disk` program runs in the `hotplug` pod to create the necessary sockets for these volumes. + +### Volume Detection and Mounting +- The `virt-handler` locates the sockets on the host system at: + `/var/lib/kubelet/pods//volumes/empty-dir/hp-disks/...`. +- For block devices, `virt-handler` creates a block device on the VM using `mknodat`. +- For file systems, the volume is mounted as a file. + +### Unmounting +- The unmount process is identical to that of `hotplug PVCs`. +- The `emptyDir` resources are retained and cleaned up later by Kubernetes. +---