diff --git a/api/v1alpha1/lvm_logical_volume.go b/api/v1alpha1/lvm_logical_volume.go index 9d714a9a..1dc6af93 100644 --- a/api/v1alpha1/lvm_logical_volume.go +++ b/api/v1alpha1/lvm_logical_volume.go @@ -41,7 +41,7 @@ type LVMLogicalVolumeSpec struct { Type string `json:"type"` Size string `json:"size"` LVMVolumeGroupName string `json:"lvmVolumeGroupName"` - Source string `json:"source"` + Source *LVMLogicalVolumeSource `json:"source"` Thin *LVMLogicalVolumeThinSpec `json:"thin"` Thick *LVMLogicalVolumeThickSpec `json:"thick"` } @@ -59,3 +59,9 @@ type LVMLogicalVolumeStatus struct { ActualSize resource.Quantity `json:"actualSize"` Contiguous *bool `json:"contiguous"` } + +type LVMLogicalVolumeSource struct { + // Either LVMLogicalVolume or LVMLogicalVolumeSnapshot + Kind string `json:"kind"` + Name string `json:"name"` +} diff --git a/images/agent/src/cmd/main.go b/images/agent/src/cmd/main.go index c4b8c793..1a8e81bb 100644 --- a/images/agent/src/cmd/main.go +++ b/images/agent/src/cmd/main.go @@ -36,10 +36,12 @@ import ( "agent/config" "agent/pkg/cache" "agent/pkg/controller" + "agent/pkg/controller/bd" "agent/pkg/kubutils" "agent/pkg/logger" "agent/pkg/monitoring" "agent/pkg/scanner" + "agent/pkg/utils" ) var ( @@ -110,15 +112,27 @@ func main() { metrics := monitoring.GetMetrics(cfgParams.NodeName) log.Info("[main] ReTag starts") - err = controller.ReTag(ctx, *log, metrics) - if err != nil { + if err := utils.ReTag(ctx, *log, metrics, bd.Name); err != nil { log.Error(err, "[main] unable to run ReTag") } - log.Info("[main] ReTag ends") sdsCache := cache.New() - bdCtrl, err := controller.RunBlockDeviceController(mgr, *cfgParams, *log, metrics, sdsCache) + rediscoverBlockDevices, err := controller.AddDiscoverer( + mgr, + *log, + bd.NewDiscoverer( + mgr.GetClient(), + *log, + metrics, + sdsCache, + bd.Options{ + NodeName: cfgParams.NodeName, + MachineID: cfgParams.MachineID, + BlockDeviceScanInterval: cfgParams.BlockDeviceScanIntervalSec, + }, + ), + ) if err != nil { log.Error(err, "[main] unable to controller.RunBlockDeviceController") os.Exit(1) @@ -136,7 +150,7 @@ func main() { } go func() { - if err = scanner.RunScanner(ctx, *log, *cfgParams, sdsCache, bdCtrl, lvgDiscoverCtrl); err != nil { + if err = scanner.RunScanner(ctx, *log, *cfgParams, sdsCache, rediscoverBlockDevices, lvgDiscoverCtrl); err != nil { log.Error(err, "[main] unable to run scanner") os.Exit(1) } diff --git a/images/agent/src/go.mod b/images/agent/src/go.mod index 94f92ebe..3e1d77d6 100644 --- a/images/agent/src/go.mod +++ b/images/agent/src/go.mod @@ -6,6 +6,7 @@ require ( github.com/deckhouse/sds-node-configurator/api v0.0.0-20240926063625-6815fd9556ea github.com/go-logr/logr v1.4.2 github.com/google/go-cmp v0.6.0 + github.com/gosimple/slug v1.14.0 github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/gomega v1.33.1 github.com/pilebones/go-udev v0.9.0 @@ -13,7 +14,7 @@ require ( github.com/stretchr/testify v1.9.0 k8s.io/api v0.31.0 k8s.io/apiextensions-apiserver v0.31.0 - k8s.io/apimachinery v0.31.0 + k8s.io/apimachinery v0.31.1 k8s.io/client-go v0.31.0 k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 @@ -42,7 +43,6 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af // indirect github.com/google/uuid v1.6.0 // indirect - github.com/gosimple/slug v1.14.0 // indirect github.com/gosimple/unidecode v1.0.1 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect diff --git a/images/agent/src/go.sum b/images/agent/src/go.sum index 28ef6755..2098bcc9 100644 --- a/images/agent/src/go.sum +++ b/images/agent/src/go.sum @@ -181,8 +181,8 @@ k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo= k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE= k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk= k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk= -k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc= -k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= +k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8= k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= diff --git a/images/agent/src/pkg/controller/bd/discoverer.go b/images/agent/src/pkg/controller/bd/discoverer.go new file mode 100644 index 00000000..9787adc7 --- /dev/null +++ b/images/agent/src/pkg/controller/bd/discoverer.go @@ -0,0 +1,653 @@ +package bd + +import ( + "agent/internal" + "agent/pkg/cache" + "agent/pkg/controller" + "agent/pkg/logger" + "agent/pkg/monitoring" + "context" + "crypto/sha1" + "fmt" + "os" + "reflect" + "regexp" + "strconv" + "strings" + "time" + + "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/gosimple/slug" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const Name = "block-device-controller" + +type Discoverer struct { + cl client.Client + log logger.Logger + metrics monitoring.Metrics + sdsCache *cache.Cache + opts Options +} + +type Options struct { + BlockDeviceScanInterval time.Duration + MachineID string + NodeName string +} + +func NewDiscoverer( + cl client.Client, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, + opts Options, +) *Discoverer { + return &Discoverer{ + cl: cl, + log: log, + metrics: metrics, + sdsCache: sdsCache, + opts: opts, + } +} + +func (d *Discoverer) Name() string { + return Name +} + +func (d *Discoverer) Discover(ctx context.Context) (controller.Result, error) { + d.log.Info("[RunBlockDeviceController] Reconciler starts BlockDevice resources reconciliation") + + shouldRequeue := d.blockDeviceReconcile(ctx) + if shouldRequeue { + d.log.Warning(fmt.Sprintf("[RunBlockDeviceController] Reconciler needs a retry in %f", d.opts.BlockDeviceScanInterval.Seconds())) + return controller.Result{RequeueAfter: d.opts.BlockDeviceScanInterval}, nil + } + d.log.Info("[RunBlockDeviceController] Reconciler successfully ended BlockDevice resources reconciliation") + return controller.Result{}, nil +} + +func (d *Discoverer) blockDeviceReconcile(ctx context.Context) bool { + reconcileStart := time.Now() + + d.log.Info("[RunBlockDeviceController] START reconcile of block devices") + + candidates := d.getBlockDeviceCandidates() + if len(candidates) == 0 { + d.log.Info("[RunBlockDeviceController] no block devices candidates found. Stop reconciliation") + return false + } + + apiBlockDevices, err := d.getAPIBlockDevices(ctx, nil) + if err != nil { + d.log.Error(err, "[RunBlockDeviceController] unable to GetAPIBlockDevices") + return true + } + + if len(apiBlockDevices) == 0 { + d.log.Debug("[RunBlockDeviceController] no BlockDevice resources were found") + } + + // create new API devices + for _, candidate := range candidates { + blockDevice, exist := apiBlockDevices[candidate.Name] + if exist { + if !hasBlockDeviceDiff(blockDevice, candidate) { + d.log.Debug(fmt.Sprintf(`[RunBlockDeviceController] no data to update for block device, name: "%s"`, candidate.Name)) + continue + } + + if err = d.updateAPIBlockDevice(ctx, blockDevice, candidate); err != nil { + d.log.Error(err, "[RunBlockDeviceController] unable to update blockDevice, name: %s", blockDevice.Name) + continue + } + + d.log.Info(fmt.Sprintf(`[RunBlockDeviceController] updated APIBlockDevice, name: %s`, blockDevice.Name)) + continue + } + + device, err := d.createAPIBlockDevice(ctx, candidate) + if err != nil { + d.log.Error(err, fmt.Sprintf("[RunBlockDeviceController] unable to create block device blockDevice, name: %s", candidate.Name)) + continue + } + d.log.Info(fmt.Sprintf("[RunBlockDeviceController] created new APIBlockDevice: %s", candidate.Name)) + + // add new api device to the map, so it won't be deleted as fantom + apiBlockDevices[candidate.Name] = *device + } + + // delete api device if device no longer exists, but we still have its api resource + d.removeDeprecatedAPIDevices(ctx, candidates, apiBlockDevices) + + d.log.Info("[RunBlockDeviceController] END reconcile of block devices") + d.metrics.ReconcileDuration(Name).Observe(d.metrics.GetEstimatedTimeInSeconds(reconcileStart)) + d.metrics.ReconcilesCountTotal(Name).Inc() + + return false +} + +// getAPIBlockDevices returns map of BlockDevice resources with BlockDevice as a key. You might specify a selector to get a subset or +// leave it as nil to get all the resources. +func (d *Discoverer) getAPIBlockDevices( + ctx context.Context, + selector *metav1.LabelSelector, +) (map[string]v1alpha1.BlockDevice, error) { + list := &v1alpha1.BlockDeviceList{} + s, err := metav1.LabelSelectorAsSelector(selector) + if err != nil { + return nil, err + } + if s == labels.Nothing() { + s = nil + } + start := time.Now() + err = d.cl.List(ctx, list, &client.ListOptions{LabelSelector: s}) + d.metrics.APIMethodsDuration(Name, "list").Observe(d.metrics.GetEstimatedTimeInSeconds(start)) + d.metrics.APIMethodsExecutionCount(Name, "list").Inc() + if err != nil { + d.metrics.APIMethodsErrors(Name, "list").Inc() + return nil, err + } + + result := make(map[string]v1alpha1.BlockDevice, len(list.Items)) + for _, item := range list.Items { + result[item.Name] = item + } + + return result, nil +} + +func (d *Discoverer) removeDeprecatedAPIDevices( + ctx context.Context, + candidates []internal.BlockDeviceCandidate, + apiBlockDevices map[string]v1alpha1.BlockDevice, +) { + actualCandidates := make(map[string]struct{}, len(candidates)) + for _, candidate := range candidates { + actualCandidates[candidate.Name] = struct{}{} + } + + for name, device := range apiBlockDevices { + if shouldDeleteBlockDevice(device, actualCandidates, d.opts.NodeName) { + err := d.deleteAPIBlockDevice(ctx, &device) + if err != nil { + d.log.Error(err, fmt.Sprintf("[RunBlockDeviceController] unable to delete APIBlockDevice, name: %s", name)) + continue + } + + delete(apiBlockDevices, name) + d.log.Info(fmt.Sprintf("[RunBlockDeviceController] device deleted, name: %s", name)) + } + } +} + +func (d *Discoverer) getBlockDeviceCandidates() []internal.BlockDeviceCandidate { + var candidates []internal.BlockDeviceCandidate + devices, _ := d.sdsCache.GetDevices() + if len(devices) == 0 { + d.log.Debug("[GetBlockDeviceCandidates] no devices found, returns empty candidates") + return candidates + } + + filteredDevices, err := d.filterDevices(devices) + if err != nil { + d.log.Error(err, "[GetBlockDeviceCandidates] unable to filter devices") + return nil + } + + if len(filteredDevices) == 0 { + d.log.Debug("[GetBlockDeviceCandidates] no filtered devices left, returns empty candidates") + return candidates + } + + pvs, _ := d.sdsCache.GetPVs() + if len(pvs) == 0 { + d.log.Debug("[GetBlockDeviceCandidates] no PVs found") + } + + var delFlag bool + candidates = make([]internal.BlockDeviceCandidate, 0, len(filteredDevices)) + + for _, device := range filteredDevices { + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] Process device: %+v", device)) + candidate := internal.BlockDeviceCandidate{ + NodeName: d.opts.NodeName, + Consumable: checkConsumable(device), + Wwn: device.Wwn, + Serial: device.Serial, + Path: device.Name, + Size: device.Size, + Rota: device.Rota, + Model: device.Model, + HotPlug: device.HotPlug, + KName: device.KName, + PkName: device.PkName, + Type: device.Type, + FSType: device.FSType, + MachineID: d.opts.MachineID, + PartUUID: device.PartUUID, + } + + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] Get following candidate: %+v", candidate)) + candidateName := d.createCandidateName(candidate, devices) + + if candidateName == "" { + d.log.Trace("[GetBlockDeviceCandidates] candidateName is empty. Skipping device") + continue + } + + candidate.Name = candidateName + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] Generated a unique candidate name: %s", candidate.Name)) + + delFlag = false + for _, pv := range pvs { + if pv.PVName == device.Name { + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] The device is a PV. Found PV name: %s", pv.PVName)) + if candidate.FSType == internal.LVMFSType { + hasTag, lvmVGName := checkTag(pv.VGTags) + if hasTag { + d.log.Debug(fmt.Sprintf("[GetBlockDeviceCandidates] PV %s of BlockDevice %s has tag, fill the VG information", pv.PVName, candidate.Name)) + candidate.PVUuid = pv.PVUuid + candidate.VGUuid = pv.VGUuid + candidate.ActualVGNameOnTheNode = pv.VGName + candidate.LVMVolumeGroupName = lvmVGName + } else { + if len(pv.VGName) != 0 { + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] The device is a PV with VG named %s that lacks our tag %s. Removing it from Kubernetes", pv.VGName, internal.LVMTags[0])) + delFlag = true + } else { + candidate.PVUuid = pv.PVUuid + } + } + } + } + } + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] delFlag: %t", delFlag)) + if delFlag { + continue + } + d.log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] configured candidate %+v", candidate)) + candidates = append(candidates, candidate) + } + + return candidates +} + +func (d *Discoverer) filterDevices(devices []internal.Device) ([]internal.Device, error) { + d.log.Trace(fmt.Sprintf("[filterDevices] devices before type filtration: %+v", devices)) + + validTypes := make([]internal.Device, 0, len(devices)) + + for _, device := range devices { + if !strings.HasPrefix(device.Name, internal.DRBDName) && + hasValidType(device.Type) && + hasValidFSType(device.FSType) { + validTypes = append(validTypes, device) + } + } + + d.log.Trace(fmt.Sprintf("[filterDevices] devices after type filtration: %+v", validTypes)) + + pkNames := make(map[string]struct{}, len(validTypes)) + for _, device := range devices { + if device.PkName != "" { + d.log.Trace(fmt.Sprintf("[filterDevices] find parent %s for child : %+v.", device.PkName, device)) + pkNames[device.PkName] = struct{}{} + } + } + d.log.Trace(fmt.Sprintf("[filterDevices] pkNames: %+v", pkNames)) + + filtered := make([]internal.Device, 0, len(validTypes)) + for _, device := range validTypes { + if !isParent(device.KName, pkNames) || device.FSType == internal.LVMFSType { + validSize, err := hasValidSize(device.Size) + if err != nil { + return nil, err + } + + if validSize { + filtered = append(filtered, device) + } + } + } + + d.log.Trace(fmt.Sprintf("[filterDevices] final filtered devices: %+v", filtered)) + + return filtered, nil +} + +func (d *Discoverer) createCandidateName(candidate internal.BlockDeviceCandidate, devices []internal.Device) string { + if len(candidate.Serial) == 0 { + d.log.Trace(fmt.Sprintf("[CreateCandidateName] Serial number is empty for device: %s", candidate.Path)) + if candidate.Type == internal.PartType { + if len(candidate.PartUUID) == 0 { + d.log.Warning(fmt.Sprintf("[CreateCandidateName] Type = part and cannot get PartUUID; skipping this device, path: %s", candidate.Path)) + return "" + } + d.log.Trace(fmt.Sprintf("[CreateCandidateName] Type = part and PartUUID is not empty; skiping getting serial number for device: %s", candidate.Path)) + } else { + d.log.Debug(fmt.Sprintf("[CreateCandidateName] Serial number is empty and device type is not part; trying to obtain serial number or its equivalent for device: %s, with type: %s", candidate.Path, candidate.Type)) + + switch candidate.Type { + case internal.MultiPathType: + d.log.Debug(fmt.Sprintf("[CreateCandidateName] device %s type = %s; get serial number from parent device.", candidate.Path, candidate.Type)) + d.log.Trace(fmt.Sprintf("[CreateCandidateName] device: %+v. Device list: %+v", candidate, devices)) + serial, err := getSerialForMultipathDevice(candidate, devices) + if err != nil { + d.log.Warning(fmt.Sprintf("[CreateCandidateName] Unable to obtain serial number or its equivalent; skipping device: %s. Error: %s", candidate.Path, err)) + return "" + } + candidate.Serial = serial + d.log.Info(fmt.Sprintf("[CreateCandidateName] Successfully obtained serial number or its equivalent: %s for device: %s", candidate.Serial, candidate.Path)) + default: + isMdRaid := false + matched, err := regexp.MatchString(`raid.*`, candidate.Type) + if err != nil { + d.log.Error(err, "[CreateCandidateName] failed to match regex - unable to determine if the device is an mdraid. Attempting to retrieve serial number directly from the device") + } else if matched { + d.log.Trace("[CreateCandidateName] device is mdraid") + isMdRaid = true + } + serial, err := readSerialBlockDevice(candidate.Path, isMdRaid) + if err != nil { + d.log.Warning(fmt.Sprintf("[CreateCandidateName] Unable to obtain serial number or its equivalent; skipping device: %s. Error: %s", candidate.Path, err)) + return "" + } + d.log.Info(fmt.Sprintf("[CreateCandidateName] Successfully obtained serial number or its equivalent: %s for device: %s", serial, candidate.Path)) + candidate.Serial = serial + } + } + } + + d.log.Trace(fmt.Sprintf("[CreateCandidateName] Serial number is now: %s. Creating candidate name", candidate.Serial)) + return createUniqDeviceName(candidate) +} + +func (d *Discoverer) updateAPIBlockDevice( + ctx context.Context, + blockDevice v1alpha1.BlockDevice, + candidate internal.BlockDeviceCandidate, +) error { + blockDevice.Status = v1alpha1.BlockDeviceStatus{ + Type: candidate.Type, + FsType: candidate.FSType, + NodeName: candidate.NodeName, + Consumable: candidate.Consumable, + PVUuid: candidate.PVUuid, + VGUuid: candidate.VGUuid, + PartUUID: candidate.PartUUID, + LVMVolumeGroupName: candidate.LVMVolumeGroupName, + ActualVGNameOnTheNode: candidate.ActualVGNameOnTheNode, + Wwn: candidate.Wwn, + Serial: candidate.Serial, + Path: candidate.Path, + Size: *resource.NewQuantity(candidate.Size.Value(), resource.BinarySI), + Model: candidate.Model, + Rota: candidate.Rota, + HotPlug: candidate.HotPlug, + MachineID: candidate.MachineID, + } + + blockDevice.Labels = configureBlockDeviceLabels(blockDevice) + + start := time.Now() + err := d.cl.Update(ctx, &blockDevice) + d.metrics.APIMethodsDuration(Name, "update").Observe(d.metrics.GetEstimatedTimeInSeconds(start)) + d.metrics.APIMethodsExecutionCount(Name, "update").Inc() + if err != nil { + d.metrics.APIMethodsErrors(Name, "update").Inc() + return err + } + + return nil +} + +func (d *Discoverer) createAPIBlockDevice(ctx context.Context, candidate internal.BlockDeviceCandidate) (*v1alpha1.BlockDevice, error) { + blockDevice := &v1alpha1.BlockDevice{ + ObjectMeta: metav1.ObjectMeta{ + Name: candidate.Name, + }, + Status: v1alpha1.BlockDeviceStatus{ + Type: candidate.Type, + FsType: candidate.FSType, + NodeName: candidate.NodeName, + Consumable: candidate.Consumable, + PVUuid: candidate.PVUuid, + VGUuid: candidate.VGUuid, + PartUUID: candidate.PartUUID, + LVMVolumeGroupName: candidate.LVMVolumeGroupName, + ActualVGNameOnTheNode: candidate.ActualVGNameOnTheNode, + Wwn: candidate.Wwn, + Serial: candidate.Serial, + Path: candidate.Path, + Size: *resource.NewQuantity(candidate.Size.Value(), resource.BinarySI), + Model: candidate.Model, + Rota: candidate.Rota, + MachineID: candidate.MachineID, + }, + } + + blockDevice.Labels = configureBlockDeviceLabels(*blockDevice) + start := time.Now() + + err := d.cl.Create(ctx, blockDevice) + d.metrics.APIMethodsDuration(Name, "create").Observe(d.metrics.GetEstimatedTimeInSeconds(start)) + d.metrics.APIMethodsExecutionCount(Name, "create").Inc() + if err != nil { + d.metrics.APIMethodsErrors(Name, "create").Inc() + return nil, err + } + return blockDevice, nil +} + +func (d *Discoverer) deleteAPIBlockDevice(ctx context.Context, device *v1alpha1.BlockDevice) error { + start := time.Now() + err := d.cl.Delete(ctx, device) + d.metrics.APIMethodsDuration(Name, "delete").Observe(d.metrics.GetEstimatedTimeInSeconds(start)) + d.metrics.APIMethodsExecutionCount(Name, "delete").Inc() + if err != nil { + d.metrics.APIMethodsErrors(Name, "delete").Inc() + return err + } + return nil +} + +func hasBlockDeviceDiff(blockDevice v1alpha1.BlockDevice, candidate internal.BlockDeviceCandidate) bool { + return candidate.NodeName != blockDevice.Status.NodeName || + candidate.Consumable != blockDevice.Status.Consumable || + candidate.PVUuid != blockDevice.Status.PVUuid || + candidate.VGUuid != blockDevice.Status.VGUuid || + candidate.PartUUID != blockDevice.Status.PartUUID || + candidate.LVMVolumeGroupName != blockDevice.Status.LVMVolumeGroupName || + candidate.ActualVGNameOnTheNode != blockDevice.Status.ActualVGNameOnTheNode || + candidate.Wwn != blockDevice.Status.Wwn || + candidate.Serial != blockDevice.Status.Serial || + candidate.Path != blockDevice.Status.Path || + candidate.Size.Value() != blockDevice.Status.Size.Value() || + candidate.Rota != blockDevice.Status.Rota || + candidate.Model != blockDevice.Status.Model || + candidate.HotPlug != blockDevice.Status.HotPlug || + candidate.Type != blockDevice.Status.Type || + candidate.FSType != blockDevice.Status.FsType || + candidate.MachineID != blockDevice.Status.MachineID || + !reflect.DeepEqual(configureBlockDeviceLabels(blockDevice), blockDevice.Labels) +} + +func getSerialForMultipathDevice(candidate internal.BlockDeviceCandidate, devices []internal.Device) (string, error) { + parentDevice := getParentDevice(candidate.PkName, devices) + if parentDevice.Name == "" { + err := fmt.Errorf("parent device %s not found for multipath device: %s in device list", candidate.PkName, candidate.Path) + return "", err + } + + if parentDevice.FSType != internal.MultiPathMemberFSType { + err := fmt.Errorf("parent device %s for multipath device %s is not a multipath member (fstype != %s)", parentDevice.Name, candidate.Path, internal.MultiPathMemberFSType) + return "", err + } + + if parentDevice.Serial == "" { + err := fmt.Errorf("serial number is empty for parent device %s", parentDevice.Name) + return "", err + } + + return parentDevice.Serial, nil +} + +func getParentDevice(pkName string, devices []internal.Device) internal.Device { + for _, device := range devices { + if device.Name == pkName { + return device + } + } + return internal.Device{} +} + +func shouldDeleteBlockDevice(bd v1alpha1.BlockDevice, actualCandidates map[string]struct{}, nodeName string) bool { + if bd.Status.NodeName == nodeName && + bd.Status.Consumable && + isBlockDeviceDeprecated(bd.Name, actualCandidates) { + return true + } + + return false +} + +func isBlockDeviceDeprecated(blockDevice string, actualCandidates map[string]struct{}) bool { + _, ok := actualCandidates[blockDevice] + return !ok +} + +func hasValidSize(size resource.Quantity) (bool, error) { + limitSize, err := resource.ParseQuantity(internal.BlockDeviceValidSize) + if err != nil { + return false, err + } + + return size.Value() >= limitSize.Value(), nil +} + +func isParent(kName string, pkNames map[string]struct{}) bool { + _, ok := pkNames[kName] + return ok +} + +func hasValidType(deviceType string) bool { + for _, invalidType := range internal.InvalidDeviceTypes { + if deviceType == invalidType { + return false + } + } + + return true +} + +func hasValidFSType(fsType string) bool { + if fsType == "" { + return true + } + + for _, allowedType := range internal.AllowedFSTypes { + if fsType == allowedType { + return true + } + } + + return false +} + +func checkConsumable(device internal.Device) bool { + if device.MountPoint != "" { + return false + } + + if device.FSType != "" { + return false + } + + if device.HotPlug { + return false + } + + return true +} + +func checkTag(tags string) (bool, string) { + if !strings.Contains(tags, internal.LVMTags[0]) { + return false, "" + } + + splitTags := strings.Split(tags, ",") + for _, tag := range splitTags { + if strings.HasPrefix(tag, "storage.deckhouse.io/lvmVolumeGroupName") { + kv := strings.Split(tag, "=") + return true, kv[1] + } + } + + return true, "" +} + +func createUniqDeviceName(can internal.BlockDeviceCandidate) string { + temp := fmt.Sprintf("%s%s%s%s%s", can.NodeName, can.Wwn, can.Model, can.Serial, can.PartUUID) + s := fmt.Sprintf("dev-%x", sha1.Sum([]byte(temp))) + return s +} + +func readSerialBlockDevice(deviceName string, isMdRaid bool) (string, error) { + if len(deviceName) < 6 { + return "", fmt.Errorf("device name is too short") + } + strPath := fmt.Sprintf("/sys/block/%s/serial", deviceName[5:]) + + if isMdRaid { + strPath = fmt.Sprintf("/sys/block/%s/md/uuid", deviceName[5:]) + } + + serial, err := os.ReadFile(strPath) + if err != nil { + return "", fmt.Errorf("unable to read serial from block device: %s, error: %s", deviceName, err) + } + if len(serial) == 0 { + return "", fmt.Errorf("serial is empty") + } + return string(serial), nil +} + +func configureBlockDeviceLabels(blockDevice v1alpha1.BlockDevice) map[string]string { + var lbls map[string]string + if blockDevice.Labels == nil { + lbls = make(map[string]string, 16) + } else { + lbls = make(map[string]string, len(blockDevice.Labels)) + } + + for key, value := range blockDevice.Labels { + lbls[key] = value + } + + slug.Lowercase = false + lbls[internal.MetadataNameLabelKey] = slug.Make(blockDevice.ObjectMeta.Name) + lbls[internal.HostNameLabelKey] = slug.Make(blockDevice.Status.NodeName) + lbls[internal.BlockDeviceTypeLabelKey] = slug.Make(blockDevice.Status.Type) + lbls[internal.BlockDeviceFSTypeLabelKey] = slug.Make(blockDevice.Status.FsType) + lbls[internal.BlockDevicePVUUIDLabelKey] = blockDevice.Status.PVUuid + lbls[internal.BlockDeviceVGUUIDLabelKey] = blockDevice.Status.VGUuid + lbls[internal.BlockDevicePartUUIDLabelKey] = blockDevice.Status.PartUUID + lbls[internal.BlockDeviceLVMVolumeGroupNameLabelKey] = slug.Make(blockDevice.Status.LVMVolumeGroupName) + lbls[internal.BlockDeviceActualVGNameLabelKey] = slug.Make(blockDevice.Status.ActualVGNameOnTheNode) + lbls[internal.BlockDeviceWWNLabelKey] = slug.Make(blockDevice.Status.Wwn) + lbls[internal.BlockDeviceSerialLabelKey] = slug.Make(blockDevice.Status.Serial) + lbls[internal.BlockDeviceSizeLabelKey] = blockDevice.Status.Size.String() + lbls[internal.BlockDeviceModelLabelKey] = slug.Make(blockDevice.Status.Model) + lbls[internal.BlockDeviceRotaLabelKey] = strconv.FormatBool(blockDevice.Status.Rota) + lbls[internal.BlockDeviceHotPlugLabelKey] = strconv.FormatBool(blockDevice.Status.HotPlug) + lbls[internal.BlockDeviceMachineIDLabelKey] = slug.Make(blockDevice.Status.MachineID) + + return lbls +} diff --git a/images/agent/src/pkg/controller/controller_reconcile_test.go b/images/agent/src/pkg/controller/bd/discoverer_suite_test.go similarity index 71% rename from images/agent/src/pkg/controller/controller_reconcile_test.go rename to images/agent/src/pkg/controller/bd/discoverer_suite_test.go index 603cc6ed..77edac5c 100644 --- a/images/agent/src/pkg/controller/controller_reconcile_test.go +++ b/images/agent/src/pkg/controller/bd/discoverer_suite_test.go @@ -14,10 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ -package controller_test +package bd import ( "context" + "testing" "github.com/deckhouse/sds-node-configurator/api/v1alpha1" . "github.com/onsi/ginkgo/v2" @@ -26,43 +27,45 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "agent/internal" - "agent/pkg/controller" + "agent/pkg/cache" + "agent/pkg/logger" "agent/pkg/monitoring" + "agent/pkg/test_utils" ) var _ = Describe("Storage Controller", func() { - - var ( - ctx = context.Background() - testMetrics = monitoring.GetMetrics("") - deviceName = "/dev/sda" - candidate = internal.BlockDeviceCandidate{ - NodeName: "test-node", - Consumable: true, - PVUuid: "123", - VGUuid: "123", - LVMVolumeGroupName: "testLvm", - ActualVGNameOnTheNode: "testVG", - Wwn: "WW12345678", - Serial: "test", - Path: deviceName, - Size: resource.Quantity{}, - Rota: false, - Model: "very good-model", - Name: "/dev/sda", - HotPlug: false, - KName: "/dev/sda", - PkName: "/dev/sda14", - Type: "disk", - FSType: "", - MachineID: "1234", - } - ) - - cl := NewFakeClient() + ctx := context.Background() + testMetrics := monitoring.GetMetrics("") + deviceName := "/dev/sda" + candidate := internal.BlockDeviceCandidate{ + NodeName: "test-node", + Consumable: true, + PVUuid: "123", + VGUuid: "123", + LVMVolumeGroupName: "testLvm", + ActualVGNameOnTheNode: "testVG", + Wwn: "WW12345678", + Serial: "test", + Path: deviceName, + Size: resource.Quantity{}, + Rota: false, + Model: "very good-model", + Name: "/dev/sda", + HotPlug: false, + KName: "/dev/sda", + PkName: "/dev/sda14", + Type: "disk", + FSType: "", + MachineID: "1234", + } + cl := test_utils.NewFakeClient() + log, _ := logger.NewLogger("1") + sdsCache := cache.New() + + r := NewDiscoverer(cl, *log, testMetrics, sdsCache, Options{}) It("CreateAPIBlockDevice", func() { - blockDevice, err := controller.CreateAPIBlockDevice(ctx, cl, testMetrics, candidate) + blockDevice, err := r.createAPIBlockDevice(ctx, candidate) Expect(err).NotTo(HaveOccurred()) Expect(blockDevice.Status.NodeName).To(Equal(candidate.NodeName)) Expect(blockDevice.Status.Consumable).To(Equal(candidate.Consumable)) @@ -82,7 +85,7 @@ var _ = Describe("Storage Controller", func() { }) It("GetAPIBlockDevices", func() { - listDevice, err := controller.GetAPIBlockDevices(ctx, cl, testMetrics, nil) + listDevice, err := r.getAPIBlockDevices(ctx, nil) Expect(err).NotTo(HaveOccurred()) Expect(listDevice).NotTo(BeNil()) Expect(len(listDevice)).To(Equal(1)) @@ -115,7 +118,7 @@ var _ = Describe("Storage Controller", func() { MachineID: "1234", } - resources, err := controller.GetAPIBlockDevices(ctx, cl, testMetrics, nil) + resources, err := r.getAPIBlockDevices(ctx, nil) Expect(err).NotTo(HaveOccurred()) Expect(resources).NotTo(BeNil()) Expect(len(resources)).To(Equal(1)) @@ -124,10 +127,10 @@ var _ = Describe("Storage Controller", func() { Expect(oldResource).NotTo(BeNil()) Expect(oldResource.Status.NodeName).To(Equal(candidate.NodeName)) - err = controller.UpdateAPIBlockDevice(ctx, cl, testMetrics, oldResource, newCandidate) + err = r.updateAPIBlockDevice(ctx, oldResource, newCandidate) Expect(err).NotTo(HaveOccurred()) - resources, err = controller.GetAPIBlockDevices(ctx, cl, testMetrics, nil) + resources, err = r.getAPIBlockDevices(ctx, nil) Expect(err).NotTo(HaveOccurred()) Expect(resources).NotTo(BeNil()) Expect(len(resources)).To(Equal(1)) @@ -140,17 +143,22 @@ var _ = Describe("Storage Controller", func() { }) It("DeleteAPIBlockDevice", func() { - err := controller.DeleteAPIBlockDevice(ctx, cl, testMetrics, &v1alpha1.BlockDevice{ + err := r.deleteAPIBlockDevice(ctx, &v1alpha1.BlockDevice{ ObjectMeta: metav1.ObjectMeta{ Name: deviceName, }, }) Expect(err).NotTo(HaveOccurred()) - devices, err := controller.GetAPIBlockDevices(context.Background(), cl, testMetrics, nil) + devices, err := r.getAPIBlockDevices(context.Background(), nil) Expect(err).NotTo(HaveOccurred()) for name := range devices { Expect(name).NotTo(Equal(deviceName)) } }) }) + +func TestController(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Controller Suite") +} diff --git a/images/agent/src/pkg/controller/block_device_test.go b/images/agent/src/pkg/controller/bd/discoverer_test.go similarity index 61% rename from images/agent/src/pkg/controller/block_device_test.go rename to images/agent/src/pkg/controller/bd/discoverer_test.go index 497836b3..fd3fa875 100644 --- a/images/agent/src/pkg/controller/block_device_test.go +++ b/images/agent/src/pkg/controller/bd/discoverer_test.go @@ -14,11 +14,12 @@ See the License for the specific language governing permissions and limitations under the License. */ -package controller +package bd import ( "bytes" "context" + _ "embed" "fmt" "strconv" "testing" @@ -30,23 +31,29 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "agent/config" "agent/internal" "agent/pkg/cache" "agent/pkg/logger" "agent/pkg/monitoring" + "agent/pkg/test_utils" "agent/pkg/utils" ) +//go:embed testdata/lsblk_output.json +var testLsblkOutput string + func TestBlockDeviceCtrl(t *testing.T) { ctx := context.Background() - cl := NewFakeClient() - metrics := monitoring.GetMetrics("") - log, _ := logger.NewLogger("1") - cfg := config.Options{ + opts := Options{ NodeName: "test-node", MachineID: "test-id", } + cl := test_utils.NewFakeClient() + metrics := monitoring.GetMetrics(opts.NodeName) + log, _ := logger.NewLogger("1") + sdsCache := cache.New() + + r := NewDiscoverer(cl, *log, metrics, sdsCache, opts) t.Run("GetAPIBlockDevices", func(t *testing.T) { t.Run("bds_exist_match_labels_and_expressions_return_bds", func(t *testing.T) { @@ -120,7 +127,7 @@ func TestBlockDeviceCtrl(t *testing.T) { }, } - actualBd, err := GetAPIBlockDevices(ctx, cl, metrics, lvg.Spec.BlockDeviceSelector) + actualBd, err := r.getAPIBlockDevices(ctx, lvg.Spec.BlockDeviceSelector) if assert.NoError(t, err) { assert.Equal(t, 2, len(actualBd)) @@ -195,7 +202,7 @@ func TestBlockDeviceCtrl(t *testing.T) { }, } - actualBd, err := GetAPIBlockDevices(ctx, cl, metrics, lvg.Spec.BlockDeviceSelector) + actualBd, err := r.getAPIBlockDevices(ctx, lvg.Spec.BlockDeviceSelector) if assert.NoError(t, err) { assert.Equal(t, 2, len(actualBd)) @@ -276,7 +283,7 @@ func TestBlockDeviceCtrl(t *testing.T) { }, } - actualBd, err := GetAPIBlockDevices(ctx, cl, metrics, lvg.Spec.BlockDeviceSelector) + actualBd, err := r.getAPIBlockDevices(ctx, lvg.Spec.BlockDeviceSelector) if assert.NoError(t, err) { assert.Equal(t, 2, len(actualBd)) _, ok := actualBd[name1] @@ -293,19 +300,19 @@ func TestBlockDeviceCtrl(t *testing.T) { t.Run("returns_true", func(t *testing.T) { bd := v1alpha1.BlockDevice{ Status: v1alpha1.BlockDeviceStatus{ - NodeName: cfg.NodeName, + NodeName: opts.NodeName, Consumable: true, }, } actual := map[string]struct{}{} - assert.True(t, shouldDeleteBlockDevice(bd, actual, cfg.NodeName)) + assert.True(t, shouldDeleteBlockDevice(bd, actual, opts.NodeName)) }) t.Run("returns_false_cause_of_dif_node", func(t *testing.T) { bd := v1alpha1.BlockDevice{ Status: v1alpha1.BlockDeviceStatus{ - NodeName: cfg.NodeName, + NodeName: opts.NodeName, Consumable: true, }, } @@ -317,13 +324,13 @@ func TestBlockDeviceCtrl(t *testing.T) { t.Run("returns_false_cause_of_not_consumable", func(t *testing.T) { bd := v1alpha1.BlockDevice{ Status: v1alpha1.BlockDeviceStatus{ - NodeName: cfg.NodeName, + NodeName: opts.NodeName, Consumable: false, }, } actual := map[string]struct{}{} - assert.False(t, shouldDeleteBlockDevice(bd, actual, cfg.NodeName)) + assert.False(t, shouldDeleteBlockDevice(bd, actual, opts.NodeName)) }) t.Run("returns_false_cause_of_not_deprecated", func(t *testing.T) { @@ -333,7 +340,7 @@ func TestBlockDeviceCtrl(t *testing.T) { Name: name, }, Status: v1alpha1.BlockDeviceStatus{ - NodeName: cfg.NodeName, + NodeName: opts.NodeName, Consumable: true, }, } @@ -341,7 +348,7 @@ func TestBlockDeviceCtrl(t *testing.T) { name: {}, } - assert.False(t, shouldDeleteBlockDevice(bd, actual, cfg.NodeName)) + assert.False(t, shouldDeleteBlockDevice(bd, actual, opts.NodeName)) }) }) @@ -353,7 +360,7 @@ func TestBlockDeviceCtrl(t *testing.T) { candidates := []internal.BlockDeviceCandidate{ { - NodeName: cfg.NodeName, + NodeName: opts.NodeName, Consumable: false, PVUuid: "142412421", VGUuid: "123123123", @@ -383,7 +390,7 @@ func TestBlockDeviceCtrl(t *testing.T) { }, Status: v1alpha1.BlockDeviceStatus{ Consumable: true, - NodeName: cfg.NodeName, + NodeName: opts.NodeName, }, }, } @@ -412,7 +419,7 @@ func TestBlockDeviceCtrl(t *testing.T) { assert.Equal(t, bd.Name, createdBd.Name) } - RemoveDeprecatedAPIDevices(ctx, cl, *log, monitoring.GetMetrics(cfg.NodeName), candidates, bds, cfg.NodeName) + r.removeDeprecatedAPIDevices(ctx, candidates, bds) _, ok := bds[badName] assert.False(t, ok) @@ -453,13 +460,13 @@ func TestBlockDeviceCtrl(t *testing.T) { sdsCache := cache.New() sdsCache.StoreDevices(devices, bytes.Buffer{}) - candidates := GetBlockDeviceCandidates(*log, cfg, sdsCache) + candidates := r.getBlockDeviceCandidates() assert.Equal(t, 3, len(candidates)) for i := range candidates { assert.Equal(t, devices[i].Name, candidates[i].Path) - assert.Equal(t, cfg.MachineID, candidates[i].MachineID) - assert.Equal(t, cfg.NodeName, candidates[i].NodeName) + assert.Equal(t, opts.MachineID, candidates[i].MachineID) + assert.Equal(t, opts.NodeName, candidates[i].NodeName) } }) @@ -481,7 +488,7 @@ func TestBlockDeviceCtrl(t *testing.T) { Rota: false, } - shouldBeTrue := CheckConsumable(goodDevice) + shouldBeTrue := checkConsumable(goodDevice) assert.True(t, shouldBeTrue) }) @@ -505,7 +512,7 @@ func TestBlockDeviceCtrl(t *testing.T) { }} for _, badDevice := range badDevices.BlockDevices { - shouldBeFalse := CheckConsumable(badDevice) + shouldBeFalse := checkConsumable(badDevice) assert.False(t, shouldBeFalse) } }) @@ -521,7 +528,7 @@ func TestBlockDeviceCtrl(t *testing.T) { Model: "HARD-DRIVE", } - deviceName := CreateUniqDeviceName(can) + deviceName := createUniqDeviceName(can) assert.Equal(t, "dev-", deviceName[0:4], "device name does not start with dev-") assert.Equal(t, len(deviceName[4:]), 40, "device name does not contains sha1 sum") }) @@ -531,7 +538,7 @@ func TestBlockDeviceCtrl(t *testing.T) { expectedName := "testName" tags := fmt.Sprintf("storage.deckhouse.io/enabled=true,storage.deckhouse.io/lvmVolumeGroupName=%s", expectedName) - shouldBeTrue, actualName := CheckTag(tags) + shouldBeTrue, actualName := checkTag(tags) if assert.True(t, shouldBeTrue) { assert.Equal(t, expectedName, actualName) } @@ -540,7 +547,7 @@ func TestBlockDeviceCtrl(t *testing.T) { t.Run("Haven't tag_Returns false and empty", func(t *testing.T) { tags := "someWeirdTags=oMGwtFIsThis" - shouldBeFalse, actualName := CheckTag(tags) + shouldBeFalse, actualName := checkTag(tags) if assert.False(t, shouldBeFalse) { assert.Equal(t, "", actualName) } @@ -609,7 +616,7 @@ func TestBlockDeviceCtrl(t *testing.T) { "some-custom-label2": "v", } - assert.Equal(t, expectedLabels, ConfigureBlockDeviceLabels(blockDevice)) + assert.Equal(t, expectedLabels, configureBlockDeviceLabels(blockDevice)) }) t.Run("hasBlockDeviceDiff", func(t *testing.T) { @@ -679,7 +686,7 @@ func TestBlockDeviceCtrl(t *testing.T) { MachineID: "testMACHINE", }, } - labels := ConfigureBlockDeviceLabels(blockDevice) + labels := configureBlockDeviceLabels(blockDevice) blockDevice.Labels = labels expected := []bool{false, true} @@ -695,13 +702,13 @@ func TestBlockDeviceCtrl(t *testing.T) { if assert.NoError(t, err) { assert.Equal(t, 31, len(devices)) } - filteredDevices, err := FilterDevices(*log, devices) + filteredDevices, err := r.filterDevices(devices) for i, device := range filteredDevices { println("Filtered device: ", device.Name) candidate := internal.BlockDeviceCandidate{ NodeName: "test-node", - Consumable: CheckConsumable(device), + Consumable: checkConsumable(device), Wwn: device.Wwn, Serial: device.Serial, Path: device.Name, @@ -725,27 +732,27 @@ func TestBlockDeviceCtrl(t *testing.T) { case 2: assert.Equal(t, "/dev/nvme4n1", device.Name) assert.True(t, candidate.Consumable) - candidateName := CreateCandidateName(*log, candidate, devices) + candidateName := r.createCandidateName(candidate, devices) assert.Equal(t, "dev-794d93d177d16bc9a85e2dd2ccbdc7325c287374", candidateName, "device name generated incorrectly") case 3: assert.Equal(t, "/dev/nvme5n1", device.Name) assert.True(t, candidate.Consumable) - candidateName := CreateCandidateName(*log, candidate, devices) + candidateName := r.createCandidateName(candidate, devices) assert.Equal(t, "dev-3306e773ab3cde6d519ce8d7c3686bf17a124dcb", candidateName, "device name generated incorrectly") case 4: assert.Equal(t, "/dev/sda4", device.Name) assert.False(t, candidate.Consumable) - candidateName := CreateCandidateName(*log, candidate, devices) + candidateName := r.createCandidateName(candidate, devices) assert.Equal(t, "dev-377bc6adf33d84eb5932f5c89798bb6c5949ae2d", candidateName, "device name generated incorrectly") case 5: assert.Equal(t, "/dev/vdc1", device.Name) assert.True(t, candidate.Consumable) - candidateName := CreateCandidateName(*log, candidate, devices) + candidateName := r.createCandidateName(candidate, devices) assert.Equal(t, "dev-a9d768213aaead8b42465ec859189de8779f96b7", candidateName, "device name generated incorrectly") case 6: assert.Equal(t, "/dev/mapper/mpatha", device.Name) assert.True(t, candidate.Consumable) - candidateName := CreateCandidateName(*log, candidate, devices) + candidateName := r.createCandidateName(candidate, devices) assert.Equal(t, "dev-98ca88ddaaddec43b1c4894756f4856244985511", candidateName, "device name generated incorrectly") } } @@ -755,418 +762,3 @@ func TestBlockDeviceCtrl(t *testing.T) { } }) } - -var ( - testLsblkOutput = ` - { - "blockdevices": [ - { - "name": "/dev/md0", - "mountpoint": "/boot", - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "1022M", - "fstype": "ext3", - "type": "raid1", - "wwn": null, - "kname": "/dev/md0", - "pkname": "/dev/nvme3n1p2" - },{ - "name": "/dev/md1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "892.9G", - "fstype": "LVM2_member", - "type": "raid1", - "wwn": null, - "kname": "/dev/md1", - "pkname": "/dev/nvme3n1p3" - },{ - "name": "/dev/mapper/vg0-root", - "mountpoint": "/", - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "150G", - "fstype": "ext4", - "type": "lvm", - "wwn": null, - "kname": "/dev/dm-0", - "pkname": "/dev/md1" - },{ - "name": "/dev/md127", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "3.3T", - "fstype": "LVM2_member", - "type": "raid1", - "wwn": null, - "kname": "/dev/md127", - "pkname": null - },{ - "name": "/dev/mapper/vg0-pvc--nnnn--nnnnn--nnnn--nnnn--nnnnn_00000", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "1G", - "fstype": "drbd", - "type": "lvm", - "wwn": null, - "kname": "/dev/dm-1", - "pkname": "/dev/md127" - },{ - "name": "/dev/nvme1n1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "Micron", - "serial": "000000BBBBB", - "size": "1.7T", - "fstype": "ceph_bluestore", - "type": "disk", - "wwn": "eui.000000000000000100aaaaa", - "kname": "/dev/nvme1n1", - "pkname": null - },{ - "name": "/dev/nvme4n1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "Micron", - "serial": "000000AAAA", - "size": "1.7T", - "fstype": null, - "type": "disk", - "wwn": "eui.000000000000000100aaaab", - "kname": "/dev/nvme4n1", - "pkname": null - },{ - "name": "/dev/nvme5n1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "Micron", - "serial": "000000AAAAA", - "size": "1.7T", - "fstype": null, - "type": "disk", - "wwn": "eui.000000000000000100aaaaac", - "kname": "/dev/nvme5n1", - "pkname": null - },{ - "name": "/dev/nvme0n1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "Micron", - "serial": "000000AAAAAB", - "size": "1.7T", - "fstype": "ceph_bluestore", - "type": "disk", - "wwn": "eui.000000000000000100aaaaab", - "kname": "/dev/nvme0n1", - "pkname": null - },{ - "name": "/dev/nvme2n1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "SAMSUNG", - "serial": "000000AAAAAC", - "size": "894.3G", - "fstype": null, - "type": "disk", - "wwn": "eui.000000000000000100aaaaad", - "kname": "/dev/nvme2n1", - "pkname": null - },{ - "name": "/dev/nvme3n1", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "SAMSUNG", - "serial": "000000AAAAAD", - "size": "894.3G", - "fstype": null, - "type": "disk", - "wwn": "eui.000000000000000100aaaaad", - "kname": "/dev/nvme3n1", - "pkname": null - },{ - "name": "/dev/nvme2n1p1", - "mountpoint": null, - "partuuid": "11111111-e2bb-47fb-8cc1-xxxxxxx", - "hotplug": false, - "model": null, - "serial": null, - "size": "256M", - "fstype": "vfat", - "type": "part", - "wwn": "eui.000000000000000100aaaaae", - "kname": "/dev/nvme2n1p1", - "pkname": "/dev/nvme2n1" - },{ - "name": "/dev/nvme2n1p2", - "mountpoint": null, - "partuuid": "11111111-d3d4-416a-ac76-xxxxxxx", - "hotplug": false, - "model": null, - "serial": null, - "size": "1G", - "fstype": "linux_raid_member", - "type": "part", - "wwn": "eui.000000000000000100aaaaaf", - "kname": "/dev/nvme2n1p2", - "pkname": "/dev/nvme2n1" - },{ - "name": "/dev/nvme2n1p3", - "mountpoint": null, - "partuuid": "11111111-3677-4eb2-9491-xxxxxxx", - "hotplug": false, - "model": null, - "serial": null, - "size": "893G", - "fstype": "linux_raid_member", - "type": "part", - "wwn": "eui.000000000000000100aaaaag", - "kname": "/dev/nvme2n1p3", - "pkname": "/dev/nvme2n1" - },{ - "name": "/dev/nvme3n1p1", - "mountpoint": "/boot/efi", - "partuuid": "11111111-2965-47d3-8983-xxxxxxx", - "hotplug": false, - "model": null, - "serial": null, - "size": "256M", - "fstype": "vfat", - "type": "part", - "wwn": "eui.000000000000000100aaaaah", - "kname": "/dev/nvme3n1p1", - "pkname": "/dev/nvme3n1" - },{ - "name": "/dev/nvme3n1p2", - "mountpoint": null, - "partuuid": "11111111-7fa2-4318-91c4-xxxxxxx", - "hotplug": false, - "model": null, - "serial": null, - "size": "1G", - "fstype": "linux_raid_member", - "type": "part", - "wwn": "eui.000000000000000100aaaaabs", - "kname": "/dev/nvme3n1p2", - "pkname": "/dev/nvme3n1" - },{ - "name": "/dev/nvme3n1p3", - "mountpoint": null, - "partuuid": "11111111-734d-45f4-b60e-xxxxxxx", - "hotplug": false, - "model": null, - "serial": null, - "size": "893G", - "fstype": "linux_raid_member", - "type": "part", - "wwn": "eui.000000000000000100aaaaaccx", - "kname": "/dev/nvme3n1p3", - "pkname": "/dev/nvme3n1" - },{ - "name": "/dev/sda", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "Virtual_Disk", - "serial": "6006", - "size": "50G", - "fstype": null, - "type": "disk", - "wwn": "0x6006", - "kname": "/dev/sda", - "pkname": null - },{ - "name": "/dev/sda1", - "mountpoint": "/data", - "partuuid": "11111-01", - "hotplug": false, - "model": null, - "serial": null, - "size": "50G", - "fstype": "ext4", - "type": "part", - "wwn": "0x6006", - "kname": "/dev/sda1", - "pkname": "/dev/sda" - },{ - "name": "/dev/sda", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "INTEL", - "serial": "PHYS729000AAAA", - "size": "447.1G", - "fstype": null, - "type": "disk", - "wwn": "0x5555555", - "kname": "/dev/sda", - "pkname": null - },{ - "name": "/dev/sda1", - "mountpoint": "/boot/efi", - "partuuid": "xxxxx-6a34-4402-a253-nnnnn", - "hotplug": false, - "model": null, - "serial": null, - "size": "1G", - "fstype": "vfat", - "type": "part", - "wwn": "0x5555555", - "kname": "/dev/sda1", - "pkname": "/dev/sda" - },{ - "name": "/dev/sda2", - "mountpoint": null, - "partuuid": "xxxxx-99b4-42c4-9dc4-nnnnnnn", - "hotplug": false, - "model": null, - "serial": null, - "size": "1G", - "fstype": "linux_raid_member", - "type": "part", - "wwn": "0x5555555", - "kname": "/dev/sda2", - "pkname": "/dev/sda" - },{ - "name": "/dev/sda3", - "mountpoint": null, - "partuuid": "xxxxx-f3ef-4b4a-86f8-nnnnnn", - "hotplug": false, - "model": null, - "serial": null, - "size": "55G", - "fstype": "linux_raid_member", - "type": "part", - "wwn": "0x5555555", - "kname": "/dev/sda3", - "pkname": "/dev/sda" - },{ - "name": "/dev/sda4", - "mountpoint": null, - "partuuid": "xxxxx-9f91-41c5-9616-nnnnnn", - "hotplug": false, - "model": null, - "serial": null, - "size": "390.1G", - "fstype": "LVM2_member", - "type": "part", - "wwn": "0x55cddd", - "kname": "/dev/sda4", - "pkname": "/dev/sda" - },{ - "name": "/dev/mapper/data--linstor-pvc--xxxx--8997--4630--a728--nnnnnn_00000", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "30G", - "fstype": null, - "type": "lvm", - "wwn": null, - "kname": "/dev/dm-18", - "pkname": "/dev/sda4" - },{ - "name": "/dev/drbd1028", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": "50G", - "fstype": null, - "type": "disk", - "wwn": null, - "kname": "/dev/drbd1028", - "pkname": "/dev/dm-10" - },{ - "name": "/dev/vdc", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": "fhmnscgfsllbsi2u5o8v", - "size": "20G", - "fstype": null, - "type": "disk", - "wwn": null, - "kname": "/dev/vdc", - "pkname": null - },{ - "name": "/dev/vdc1", - "mountpoint": null, - "partuuid": "13dcb00e-01", - "hotplug": false, - "model": null, - "serial": null, - "size": "20G", - "fstype": null, - "type": "part", - "wwn": null, - "kname": "/dev/vdc1", - "pkname": "/dev/vdc" - },{ - "name": "/dev/mapper/mpatha", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": null, - "serial": null, - "size": 3650722201600, - "fstype": null, - "type": "mpath", - "wwn": null, - "kname": "/dev/dm-6", - "pkname": "/dev/sdf", - "rota": false - },{ - "name": "/dev/sdf", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "test-model", - "serial": "22222222xxxxx", - "size": 3650722201600, - "fstype": "mpath_member", - "type": "disk", - "wwn": "2222xxxxxx", - "kname": "/dev/sdf", - "pkname": null, - "rota": false - },{ - "name": "/dev/sdh", - "mountpoint": null, - "partuuid": null, - "hotplug": false, - "model": "test-model", - "serial": "22222222xxxxx", - "size": 3650722201600, - "fstype": "mpath_member", - "type": "disk", - "wwn": "2222xxxxxx", - "kname": "/dev/sdh", - "pkname": null, - "rota": false - } - ] - }` -) diff --git a/images/agent/src/pkg/controller/bd/doc.go b/images/agent/src/pkg/controller/bd/doc.go new file mode 100644 index 00000000..6c5ac0e2 --- /dev/null +++ b/images/agent/src/pkg/controller/bd/doc.go @@ -0,0 +1,7 @@ +package bd + +/* +BlockDevice reconciler +TODO + +*/ diff --git a/images/agent/src/pkg/controller/bd/testdata/lsblk_output.json b/images/agent/src/pkg/controller/bd/testdata/lsblk_output.json new file mode 100644 index 00000000..e7bce5b0 --- /dev/null +++ b/images/agent/src/pkg/controller/bd/testdata/lsblk_output.json @@ -0,0 +1,411 @@ +{ + "blockdevices": [ + { + "name": "/dev/md0", + "mountpoint": "/boot", + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "1022M", + "fstype": "ext3", + "type": "raid1", + "wwn": null, + "kname": "/dev/md0", + "pkname": "/dev/nvme3n1p2" + },{ + "name": "/dev/md1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "892.9G", + "fstype": "LVM2_member", + "type": "raid1", + "wwn": null, + "kname": "/dev/md1", + "pkname": "/dev/nvme3n1p3" + },{ + "name": "/dev/mapper/vg0-root", + "mountpoint": "/", + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "150G", + "fstype": "ext4", + "type": "lvm", + "wwn": null, + "kname": "/dev/dm-0", + "pkname": "/dev/md1" + },{ + "name": "/dev/md127", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "3.3T", + "fstype": "LVM2_member", + "type": "raid1", + "wwn": null, + "kname": "/dev/md127", + "pkname": null + },{ + "name": "/dev/mapper/vg0-pvc--nnnn--nnnnn--nnnn--nnnn--nnnnn_00000", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "1G", + "fstype": "drbd", + "type": "lvm", + "wwn": null, + "kname": "/dev/dm-1", + "pkname": "/dev/md127" + },{ + "name": "/dev/nvme1n1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "Micron", + "serial": "000000BBBBB", + "size": "1.7T", + "fstype": "ceph_bluestore", + "type": "disk", + "wwn": "eui.000000000000000100aaaaa", + "kname": "/dev/nvme1n1", + "pkname": null + },{ + "name": "/dev/nvme4n1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "Micron", + "serial": "000000AAAA", + "size": "1.7T", + "fstype": null, + "type": "disk", + "wwn": "eui.000000000000000100aaaab", + "kname": "/dev/nvme4n1", + "pkname": null + },{ + "name": "/dev/nvme5n1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "Micron", + "serial": "000000AAAAA", + "size": "1.7T", + "fstype": null, + "type": "disk", + "wwn": "eui.000000000000000100aaaaac", + "kname": "/dev/nvme5n1", + "pkname": null + },{ + "name": "/dev/nvme0n1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "Micron", + "serial": "000000AAAAAB", + "size": "1.7T", + "fstype": "ceph_bluestore", + "type": "disk", + "wwn": "eui.000000000000000100aaaaab", + "kname": "/dev/nvme0n1", + "pkname": null + },{ + "name": "/dev/nvme2n1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "SAMSUNG", + "serial": "000000AAAAAC", + "size": "894.3G", + "fstype": null, + "type": "disk", + "wwn": "eui.000000000000000100aaaaad", + "kname": "/dev/nvme2n1", + "pkname": null + },{ + "name": "/dev/nvme3n1", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "SAMSUNG", + "serial": "000000AAAAAD", + "size": "894.3G", + "fstype": null, + "type": "disk", + "wwn": "eui.000000000000000100aaaaad", + "kname": "/dev/nvme3n1", + "pkname": null + },{ + "name": "/dev/nvme2n1p1", + "mountpoint": null, + "partuuid": "11111111-e2bb-47fb-8cc1-xxxxxxx", + "hotplug": false, + "model": null, + "serial": null, + "size": "256M", + "fstype": "vfat", + "type": "part", + "wwn": "eui.000000000000000100aaaaae", + "kname": "/dev/nvme2n1p1", + "pkname": "/dev/nvme2n1" + },{ + "name": "/dev/nvme2n1p2", + "mountpoint": null, + "partuuid": "11111111-d3d4-416a-ac76-xxxxxxx", + "hotplug": false, + "model": null, + "serial": null, + "size": "1G", + "fstype": "linux_raid_member", + "type": "part", + "wwn": "eui.000000000000000100aaaaaf", + "kname": "/dev/nvme2n1p2", + "pkname": "/dev/nvme2n1" + },{ + "name": "/dev/nvme2n1p3", + "mountpoint": null, + "partuuid": "11111111-3677-4eb2-9491-xxxxxxx", + "hotplug": false, + "model": null, + "serial": null, + "size": "893G", + "fstype": "linux_raid_member", + "type": "part", + "wwn": "eui.000000000000000100aaaaag", + "kname": "/dev/nvme2n1p3", + "pkname": "/dev/nvme2n1" + },{ + "name": "/dev/nvme3n1p1", + "mountpoint": "/boot/efi", + "partuuid": "11111111-2965-47d3-8983-xxxxxxx", + "hotplug": false, + "model": null, + "serial": null, + "size": "256M", + "fstype": "vfat", + "type": "part", + "wwn": "eui.000000000000000100aaaaah", + "kname": "/dev/nvme3n1p1", + "pkname": "/dev/nvme3n1" + },{ + "name": "/dev/nvme3n1p2", + "mountpoint": null, + "partuuid": "11111111-7fa2-4318-91c4-xxxxxxx", + "hotplug": false, + "model": null, + "serial": null, + "size": "1G", + "fstype": "linux_raid_member", + "type": "part", + "wwn": "eui.000000000000000100aaaaabs", + "kname": "/dev/nvme3n1p2", + "pkname": "/dev/nvme3n1" + },{ + "name": "/dev/nvme3n1p3", + "mountpoint": null, + "partuuid": "11111111-734d-45f4-b60e-xxxxxxx", + "hotplug": false, + "model": null, + "serial": null, + "size": "893G", + "fstype": "linux_raid_member", + "type": "part", + "wwn": "eui.000000000000000100aaaaaccx", + "kname": "/dev/nvme3n1p3", + "pkname": "/dev/nvme3n1" + },{ + "name": "/dev/sda", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "Virtual_Disk", + "serial": "6006", + "size": "50G", + "fstype": null, + "type": "disk", + "wwn": "0x6006", + "kname": "/dev/sda", + "pkname": null + },{ + "name": "/dev/sda1", + "mountpoint": "/data", + "partuuid": "11111-01", + "hotplug": false, + "model": null, + "serial": null, + "size": "50G", + "fstype": "ext4", + "type": "part", + "wwn": "0x6006", + "kname": "/dev/sda1", + "pkname": "/dev/sda" + },{ + "name": "/dev/sda", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "INTEL", + "serial": "PHYS729000AAAA", + "size": "447.1G", + "fstype": null, + "type": "disk", + "wwn": "0x5555555", + "kname": "/dev/sda", + "pkname": null + },{ + "name": "/dev/sda1", + "mountpoint": "/boot/efi", + "partuuid": "xxxxx-6a34-4402-a253-nnnnn", + "hotplug": false, + "model": null, + "serial": null, + "size": "1G", + "fstype": "vfat", + "type": "part", + "wwn": "0x5555555", + "kname": "/dev/sda1", + "pkname": "/dev/sda" + },{ + "name": "/dev/sda2", + "mountpoint": null, + "partuuid": "xxxxx-99b4-42c4-9dc4-nnnnnnn", + "hotplug": false, + "model": null, + "serial": null, + "size": "1G", + "fstype": "linux_raid_member", + "type": "part", + "wwn": "0x5555555", + "kname": "/dev/sda2", + "pkname": "/dev/sda" + },{ + "name": "/dev/sda3", + "mountpoint": null, + "partuuid": "xxxxx-f3ef-4b4a-86f8-nnnnnn", + "hotplug": false, + "model": null, + "serial": null, + "size": "55G", + "fstype": "linux_raid_member", + "type": "part", + "wwn": "0x5555555", + "kname": "/dev/sda3", + "pkname": "/dev/sda" + },{ + "name": "/dev/sda4", + "mountpoint": null, + "partuuid": "xxxxx-9f91-41c5-9616-nnnnnn", + "hotplug": false, + "model": null, + "serial": null, + "size": "390.1G", + "fstype": "LVM2_member", + "type": "part", + "wwn": "0x55cddd", + "kname": "/dev/sda4", + "pkname": "/dev/sda" + },{ + "name": "/dev/mapper/data--linstor-pvc--xxxx--8997--4630--a728--nnnnnn_00000", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "30G", + "fstype": null, + "type": "lvm", + "wwn": null, + "kname": "/dev/dm-18", + "pkname": "/dev/sda4" + },{ + "name": "/dev/drbd1028", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": "50G", + "fstype": null, + "type": "disk", + "wwn": null, + "kname": "/dev/drbd1028", + "pkname": "/dev/dm-10" + },{ + "name": "/dev/vdc", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": "fhmnscgfsllbsi2u5o8v", + "size": "20G", + "fstype": null, + "type": "disk", + "wwn": null, + "kname": "/dev/vdc", + "pkname": null + },{ + "name": "/dev/vdc1", + "mountpoint": null, + "partuuid": "13dcb00e-01", + "hotplug": false, + "model": null, + "serial": null, + "size": "20G", + "fstype": null, + "type": "part", + "wwn": null, + "kname": "/dev/vdc1", + "pkname": "/dev/vdc" + },{ + "name": "/dev/mapper/mpatha", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": null, + "serial": null, + "size": 3650722201600, + "fstype": null, + "type": "mpath", + "wwn": null, + "kname": "/dev/dm-6", + "pkname": "/dev/sdf", + "rota": false + },{ + "name": "/dev/sdf", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "test-model", + "serial": "22222222xxxxx", + "size": 3650722201600, + "fstype": "mpath_member", + "type": "disk", + "wwn": "2222xxxxxx", + "kname": "/dev/sdf", + "pkname": null, + "rota": false + },{ + "name": "/dev/sdh", + "mountpoint": null, + "partuuid": null, + "hotplug": false, + "model": "test-model", + "serial": "22222222xxxxx", + "size": 3650722201600, + "fstype": "mpath_member", + "type": "disk", + "wwn": "2222xxxxxx", + "kname": "/dev/sdh", + "pkname": null, + "rota": false + } + ] +} \ No newline at end of file diff --git a/images/agent/src/pkg/controller/block_device.go b/images/agent/src/pkg/controller/block_device.go deleted file mode 100644 index d86ebeb4..00000000 --- a/images/agent/src/pkg/controller/block_device.go +++ /dev/null @@ -1,757 +0,0 @@ -/* -Copyright 2023 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controller - -import ( - "context" - "crypto/sha1" - "fmt" - "os" - "reflect" - "regexp" - "strconv" - "strings" - "time" - - "github.com/deckhouse/sds-node-configurator/api/v1alpha1" - "github.com/gosimple/slug" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - - "agent/config" - "agent/internal" - "agent/pkg/cache" - "agent/pkg/logger" - "agent/pkg/monitoring" - "agent/pkg/utils" -) - -const ( - BlockDeviceCtrlName = "block-device-controller" -) - -func RunBlockDeviceController( - mgr manager.Manager, - cfg config.Options, - log logger.Logger, - metrics monitoring.Metrics, - sdsCache *cache.Cache, -) (controller.Controller, error) { - cl := mgr.GetClient() - - c, err := controller.New(BlockDeviceCtrlName, mgr, controller.Options{ - Reconciler: reconcile.Func(func(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) { - log.Info("[RunBlockDeviceController] Reconciler starts BlockDevice resources reconciliation") - - shouldRequeue := BlockDeviceReconcile(ctx, cl, log, metrics, cfg, sdsCache) - if shouldRequeue { - log.Warning(fmt.Sprintf("[RunBlockDeviceController] Reconciler needs a retry in %f", cfg.BlockDeviceScanIntervalSec.Seconds())) - return reconcile.Result{ - RequeueAfter: cfg.BlockDeviceScanIntervalSec, - }, nil - } - log.Info("[RunBlockDeviceController] Reconciler successfully ended BlockDevice resources reconciliation") - return reconcile.Result{}, nil - }), - }) - - if err != nil { - log.Error(err, "[RunBlockDeviceController] unable to create controller") - return nil, err - } - - return c, err -} - -func BlockDeviceReconcile(ctx context.Context, cl client.Client, log logger.Logger, metrics monitoring.Metrics, cfg config.Options, sdsCache *cache.Cache) bool { - reconcileStart := time.Now() - - log.Info("[RunBlockDeviceController] START reconcile of block devices") - - candidates := GetBlockDeviceCandidates(log, cfg, sdsCache) - if len(candidates) == 0 { - log.Info("[RunBlockDeviceController] no block devices candidates found. Stop reconciliation") - return false - } - - apiBlockDevices, err := GetAPIBlockDevices(ctx, cl, metrics, nil) - if err != nil { - log.Error(err, "[RunBlockDeviceController] unable to GetAPIBlockDevices") - return true - } - - if len(apiBlockDevices) == 0 { - log.Debug("[RunBlockDeviceController] no BlockDevice resources were found") - } - - // create new API devices - for _, candidate := range candidates { - blockDevice, exist := apiBlockDevices[candidate.Name] - if exist { - if !hasBlockDeviceDiff(blockDevice, candidate) { - log.Debug(fmt.Sprintf(`[RunBlockDeviceController] no data to update for block device, name: "%s"`, candidate.Name)) - continue - } - - if err = UpdateAPIBlockDevice(ctx, cl, metrics, blockDevice, candidate); err != nil { - log.Error(err, "[RunBlockDeviceController] unable to update blockDevice, name: %s", blockDevice.Name) - continue - } - - log.Info(fmt.Sprintf(`[RunBlockDeviceController] updated APIBlockDevice, name: %s`, blockDevice.Name)) - continue - } - - device, err := CreateAPIBlockDevice(ctx, cl, metrics, candidate) - if err != nil { - log.Error(err, fmt.Sprintf("[RunBlockDeviceController] unable to create block device blockDevice, name: %s", candidate.Name)) - continue - } - log.Info(fmt.Sprintf("[RunBlockDeviceController] created new APIBlockDevice: %s", candidate.Name)) - - // add new api device to the map, so it won't be deleted as fantom - apiBlockDevices[candidate.Name] = *device - } - - // delete api device if device no longer exists, but we still have its api resource - RemoveDeprecatedAPIDevices(ctx, cl, log, metrics, candidates, apiBlockDevices, cfg.NodeName) - - log.Info("[RunBlockDeviceController] END reconcile of block devices") - metrics.ReconcileDuration(BlockDeviceCtrlName).Observe(metrics.GetEstimatedTimeInSeconds(reconcileStart)) - metrics.ReconcilesCountTotal(BlockDeviceCtrlName).Inc() - - return false -} - -func hasBlockDeviceDiff(blockDevice v1alpha1.BlockDevice, candidate internal.BlockDeviceCandidate) bool { - return candidate.NodeName != blockDevice.Status.NodeName || - candidate.Consumable != blockDevice.Status.Consumable || - candidate.PVUuid != blockDevice.Status.PVUuid || - candidate.VGUuid != blockDevice.Status.VGUuid || - candidate.PartUUID != blockDevice.Status.PartUUID || - candidate.LVMVolumeGroupName != blockDevice.Status.LVMVolumeGroupName || - candidate.ActualVGNameOnTheNode != blockDevice.Status.ActualVGNameOnTheNode || - candidate.Wwn != blockDevice.Status.Wwn || - candidate.Serial != blockDevice.Status.Serial || - candidate.Path != blockDevice.Status.Path || - candidate.Size.Value() != blockDevice.Status.Size.Value() || - candidate.Rota != blockDevice.Status.Rota || - candidate.Model != blockDevice.Status.Model || - candidate.HotPlug != blockDevice.Status.HotPlug || - candidate.Type != blockDevice.Status.Type || - candidate.FSType != blockDevice.Status.FsType || - candidate.MachineID != blockDevice.Status.MachineID || - !reflect.DeepEqual(ConfigureBlockDeviceLabels(blockDevice), blockDevice.Labels) -} - -// GetAPIBlockDevices returns map of BlockDevice resources with BlockDevice as a key. You might specify a selector to get a subset or -// leave it as nil to get all the resources. -func GetAPIBlockDevices(ctx context.Context, cl client.Client, metrics monitoring.Metrics, selector *metav1.LabelSelector) (map[string]v1alpha1.BlockDevice, error) { - list := &v1alpha1.BlockDeviceList{} - s, err := metav1.LabelSelectorAsSelector(selector) - if err != nil { - return nil, err - } - if s == labels.Nothing() { - s = nil - } - start := time.Now() - err = cl.List(ctx, list, &client.ListOptions{LabelSelector: s}) - metrics.APIMethodsDuration(BlockDeviceCtrlName, "list").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.APIMethodsExecutionCount(BlockDeviceCtrlName, "list").Inc() - if err != nil { - metrics.APIMethodsErrors(BlockDeviceCtrlName, "list").Inc() - return nil, err - } - - result := make(map[string]v1alpha1.BlockDevice, len(list.Items)) - for _, item := range list.Items { - result[item.Name] = item - } - - return result, nil -} - -func RemoveDeprecatedAPIDevices( - ctx context.Context, - cl client.Client, - log logger.Logger, - metrics monitoring.Metrics, - candidates []internal.BlockDeviceCandidate, - apiBlockDevices map[string]v1alpha1.BlockDevice, - nodeName string, -) { - actualCandidates := make(map[string]struct{}, len(candidates)) - for _, candidate := range candidates { - actualCandidates[candidate.Name] = struct{}{} - } - - for name, device := range apiBlockDevices { - if shouldDeleteBlockDevice(device, actualCandidates, nodeName) { - err := DeleteAPIBlockDevice(ctx, cl, metrics, &device) - if err != nil { - log.Error(err, fmt.Sprintf("[RunBlockDeviceController] unable to delete APIBlockDevice, name: %s", name)) - continue - } - - delete(apiBlockDevices, name) - log.Info(fmt.Sprintf("[RunBlockDeviceController] device deleted, name: %s", name)) - } - } -} - -func shouldDeleteBlockDevice(bd v1alpha1.BlockDevice, actualCandidates map[string]struct{}, nodeName string) bool { - if bd.Status.NodeName == nodeName && - bd.Status.Consumable && - isBlockDeviceDeprecated(bd.Name, actualCandidates) { - return true - } - - return false -} - -func isBlockDeviceDeprecated(blockDevice string, actualCandidates map[string]struct{}) bool { - _, ok := actualCandidates[blockDevice] - return !ok -} - -func GetBlockDeviceCandidates(log logger.Logger, cfg config.Options, sdsCache *cache.Cache) []internal.BlockDeviceCandidate { - var candidates []internal.BlockDeviceCandidate - devices, _ := sdsCache.GetDevices() - if len(devices) == 0 { - log.Debug("[GetBlockDeviceCandidates] no devices found, returns empty candidates") - return candidates - } - - filteredDevices, err := FilterDevices(log, devices) - if err != nil { - log.Error(err, "[GetBlockDeviceCandidates] unable to filter devices") - return nil - } - - if len(filteredDevices) == 0 { - log.Debug("[GetBlockDeviceCandidates] no filtered devices left, returns empty candidates") - return candidates - } - - pvs, _ := sdsCache.GetPVs() - if len(pvs) == 0 { - log.Debug("[GetBlockDeviceCandidates] no PVs found") - } - - var delFlag bool - candidates = make([]internal.BlockDeviceCandidate, 0, len(filteredDevices)) - - for _, device := range filteredDevices { - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] Process device: %+v", device)) - candidate := internal.BlockDeviceCandidate{ - NodeName: cfg.NodeName, - Consumable: CheckConsumable(device), - Wwn: device.Wwn, - Serial: device.Serial, - Path: device.Name, - Size: device.Size, - Rota: device.Rota, - Model: device.Model, - HotPlug: device.HotPlug, - KName: device.KName, - PkName: device.PkName, - Type: device.Type, - FSType: device.FSType, - MachineID: cfg.MachineID, - PartUUID: device.PartUUID, - } - - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] Get following candidate: %+v", candidate)) - candidateName := CreateCandidateName(log, candidate, devices) - - if candidateName == "" { - log.Trace("[GetBlockDeviceCandidates] candidateName is empty. Skipping device") - continue - } - - candidate.Name = candidateName - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] Generated a unique candidate name: %s", candidate.Name)) - - delFlag = false - for _, pv := range pvs { - if pv.PVName == device.Name { - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] The device is a PV. Found PV name: %s", pv.PVName)) - if candidate.FSType == internal.LVMFSType { - hasTag, lvmVGName := CheckTag(pv.VGTags) - if hasTag { - log.Debug(fmt.Sprintf("[GetBlockDeviceCandidates] PV %s of BlockDevice %s has tag, fill the VG information", pv.PVName, candidate.Name)) - candidate.PVUuid = pv.PVUuid - candidate.VGUuid = pv.VGUuid - candidate.ActualVGNameOnTheNode = pv.VGName - candidate.LVMVolumeGroupName = lvmVGName - } else { - if len(pv.VGName) != 0 { - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] The device is a PV with VG named %s that lacks our tag %s. Removing it from Kubernetes", pv.VGName, internal.LVMTags[0])) - delFlag = true - } else { - candidate.PVUuid = pv.PVUuid - } - } - } - } - } - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] delFlag: %t", delFlag)) - if delFlag { - continue - } - log.Trace(fmt.Sprintf("[GetBlockDeviceCandidates] configured candidate %+v", candidate)) - candidates = append(candidates, candidate) - } - - return candidates -} - -func FilterDevices(log logger.Logger, devices []internal.Device) ([]internal.Device, error) { - log.Trace(fmt.Sprintf("[filterDevices] devices before type filtration: %+v", devices)) - - validTypes := make([]internal.Device, 0, len(devices)) - - for _, device := range devices { - if !strings.HasPrefix(device.Name, internal.DRBDName) && - hasValidType(device.Type) && - hasValidFSType(device.FSType) { - validTypes = append(validTypes, device) - } - } - - log.Trace(fmt.Sprintf("[filterDevices] devices after type filtration: %+v", validTypes)) - - pkNames := make(map[string]struct{}, len(validTypes)) - for _, device := range devices { - if device.PkName != "" { - log.Trace(fmt.Sprintf("[filterDevices] find parent %s for child : %+v.", device.PkName, device)) - pkNames[device.PkName] = struct{}{} - } - } - log.Trace(fmt.Sprintf("[filterDevices] pkNames: %+v", pkNames)) - - filtered := make([]internal.Device, 0, len(validTypes)) - for _, device := range validTypes { - if !isParent(device.KName, pkNames) || device.FSType == internal.LVMFSType { - validSize, err := hasValidSize(device.Size) - if err != nil { - return nil, err - } - - if validSize { - filtered = append(filtered, device) - } - } - } - - log.Trace(fmt.Sprintf("[filterDevices] final filtered devices: %+v", filtered)) - - return filtered, nil -} - -func hasValidSize(size resource.Quantity) (bool, error) { - limitSize, err := resource.ParseQuantity(internal.BlockDeviceValidSize) - if err != nil { - return false, err - } - - return size.Value() >= limitSize.Value(), nil -} - -func isParent(kName string, pkNames map[string]struct{}) bool { - _, ok := pkNames[kName] - return ok -} - -func hasValidType(deviceType string) bool { - for _, invalidType := range internal.InvalidDeviceTypes { - if deviceType == invalidType { - return false - } - } - - return true -} - -func hasValidFSType(fsType string) bool { - if fsType == "" { - return true - } - - for _, allowedType := range internal.AllowedFSTypes { - if fsType == allowedType { - return true - } - } - - return false -} - -func CheckConsumable(device internal.Device) bool { - if device.MountPoint != "" { - return false - } - - if device.FSType != "" { - return false - } - - if device.HotPlug { - return false - } - - return true -} - -func CheckTag(tags string) (bool, string) { - if !strings.Contains(tags, internal.LVMTags[0]) { - return false, "" - } - - splitTags := strings.Split(tags, ",") - for _, tag := range splitTags { - if strings.HasPrefix(tag, "storage.deckhouse.io/lvmVolumeGroupName") { - kv := strings.Split(tag, "=") - return true, kv[1] - } - } - - return true, "" -} - -func CreateCandidateName(log logger.Logger, candidate internal.BlockDeviceCandidate, devices []internal.Device) string { - if len(candidate.Serial) == 0 { - log.Trace(fmt.Sprintf("[CreateCandidateName] Serial number is empty for device: %s", candidate.Path)) - if candidate.Type == internal.PartType { - if len(candidate.PartUUID) == 0 { - log.Warning(fmt.Sprintf("[CreateCandidateName] Type = part and cannot get PartUUID; skipping this device, path: %s", candidate.Path)) - return "" - } - log.Trace(fmt.Sprintf("[CreateCandidateName] Type = part and PartUUID is not empty; skiping getting serial number for device: %s", candidate.Path)) - } else { - log.Debug(fmt.Sprintf("[CreateCandidateName] Serial number is empty and device type is not part; trying to obtain serial number or its equivalent for device: %s, with type: %s", candidate.Path, candidate.Type)) - - switch candidate.Type { - case internal.MultiPathType: - log.Debug(fmt.Sprintf("[CreateCandidateName] device %s type = %s; get serial number from parent device.", candidate.Path, candidate.Type)) - log.Trace(fmt.Sprintf("[CreateCandidateName] device: %+v. Device list: %+v", candidate, devices)) - serial, err := getSerialForMultipathDevice(candidate, devices) - if err != nil { - log.Warning(fmt.Sprintf("[CreateCandidateName] Unable to obtain serial number or its equivalent; skipping device: %s. Error: %s", candidate.Path, err)) - return "" - } - candidate.Serial = serial - log.Info(fmt.Sprintf("[CreateCandidateName] Successfully obtained serial number or its equivalent: %s for device: %s", candidate.Serial, candidate.Path)) - default: - isMdRaid := false - matched, err := regexp.MatchString(`raid.*`, candidate.Type) - if err != nil { - log.Error(err, "[CreateCandidateName] failed to match regex - unable to determine if the device is an mdraid. Attempting to retrieve serial number directly from the device") - } else if matched { - log.Trace("[CreateCandidateName] device is mdraid") - isMdRaid = true - } - serial, err := readSerialBlockDevice(candidate.Path, isMdRaid) - if err != nil { - log.Warning(fmt.Sprintf("[CreateCandidateName] Unable to obtain serial number or its equivalent; skipping device: %s. Error: %s", candidate.Path, err)) - return "" - } - log.Info(fmt.Sprintf("[CreateCandidateName] Successfully obtained serial number or its equivalent: %s for device: %s", serial, candidate.Path)) - candidate.Serial = serial - } - } - } - - log.Trace(fmt.Sprintf("[CreateCandidateName] Serial number is now: %s. Creating candidate name", candidate.Serial)) - return CreateUniqDeviceName(candidate) -} - -func CreateUniqDeviceName(can internal.BlockDeviceCandidate) string { - temp := fmt.Sprintf("%s%s%s%s%s", can.NodeName, can.Wwn, can.Model, can.Serial, can.PartUUID) - s := fmt.Sprintf("dev-%x", sha1.Sum([]byte(temp))) - return s -} - -func readSerialBlockDevice(deviceName string, isMdRaid bool) (string, error) { - if len(deviceName) < 6 { - return "", fmt.Errorf("device name is too short") - } - strPath := fmt.Sprintf("/sys/block/%s/serial", deviceName[5:]) - - if isMdRaid { - strPath = fmt.Sprintf("/sys/block/%s/md/uuid", deviceName[5:]) - } - - serial, err := os.ReadFile(strPath) - if err != nil { - return "", fmt.Errorf("unable to read serial from block device: %s, error: %s", deviceName, err) - } - if len(serial) == 0 { - return "", fmt.Errorf("serial is empty") - } - return string(serial), nil -} - -func UpdateAPIBlockDevice(ctx context.Context, kc client.Client, metrics monitoring.Metrics, blockDevice v1alpha1.BlockDevice, candidate internal.BlockDeviceCandidate) error { - blockDevice.Status = v1alpha1.BlockDeviceStatus{ - Type: candidate.Type, - FsType: candidate.FSType, - NodeName: candidate.NodeName, - Consumable: candidate.Consumable, - PVUuid: candidate.PVUuid, - VGUuid: candidate.VGUuid, - PartUUID: candidate.PartUUID, - LVMVolumeGroupName: candidate.LVMVolumeGroupName, - ActualVGNameOnTheNode: candidate.ActualVGNameOnTheNode, - Wwn: candidate.Wwn, - Serial: candidate.Serial, - Path: candidate.Path, - Size: *resource.NewQuantity(candidate.Size.Value(), resource.BinarySI), - Model: candidate.Model, - Rota: candidate.Rota, - HotPlug: candidate.HotPlug, - MachineID: candidate.MachineID, - } - - blockDevice.Labels = ConfigureBlockDeviceLabels(blockDevice) - - start := time.Now() - err := kc.Update(ctx, &blockDevice) - metrics.APIMethodsDuration(BlockDeviceCtrlName, "update").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.APIMethodsExecutionCount(BlockDeviceCtrlName, "update").Inc() - if err != nil { - metrics.APIMethodsErrors(BlockDeviceCtrlName, "update").Inc() - return err - } - - return nil -} - -func ConfigureBlockDeviceLabels(blockDevice v1alpha1.BlockDevice) map[string]string { - var lbls map[string]string - if blockDevice.Labels == nil { - lbls = make(map[string]string, 16) - } else { - lbls = make(map[string]string, len(blockDevice.Labels)) - } - - for key, value := range blockDevice.Labels { - lbls[key] = value - } - - slug.Lowercase = false - lbls[internal.MetadataNameLabelKey] = slug.Make(blockDevice.ObjectMeta.Name) - lbls[internal.HostNameLabelKey] = slug.Make(blockDevice.Status.NodeName) - lbls[internal.BlockDeviceTypeLabelKey] = slug.Make(blockDevice.Status.Type) - lbls[internal.BlockDeviceFSTypeLabelKey] = slug.Make(blockDevice.Status.FsType) - lbls[internal.BlockDevicePVUUIDLabelKey] = blockDevice.Status.PVUuid - lbls[internal.BlockDeviceVGUUIDLabelKey] = blockDevice.Status.VGUuid - lbls[internal.BlockDevicePartUUIDLabelKey] = blockDevice.Status.PartUUID - lbls[internal.BlockDeviceLVMVolumeGroupNameLabelKey] = slug.Make(blockDevice.Status.LVMVolumeGroupName) - lbls[internal.BlockDeviceActualVGNameLabelKey] = slug.Make(blockDevice.Status.ActualVGNameOnTheNode) - lbls[internal.BlockDeviceWWNLabelKey] = slug.Make(blockDevice.Status.Wwn) - lbls[internal.BlockDeviceSerialLabelKey] = slug.Make(blockDevice.Status.Serial) - lbls[internal.BlockDeviceSizeLabelKey] = blockDevice.Status.Size.String() - lbls[internal.BlockDeviceModelLabelKey] = slug.Make(blockDevice.Status.Model) - lbls[internal.BlockDeviceRotaLabelKey] = strconv.FormatBool(blockDevice.Status.Rota) - lbls[internal.BlockDeviceHotPlugLabelKey] = strconv.FormatBool(blockDevice.Status.HotPlug) - lbls[internal.BlockDeviceMachineIDLabelKey] = slug.Make(blockDevice.Status.MachineID) - - return lbls -} - -func CreateAPIBlockDevice(ctx context.Context, kc client.Client, metrics monitoring.Metrics, candidate internal.BlockDeviceCandidate) (*v1alpha1.BlockDevice, error) { - blockDevice := &v1alpha1.BlockDevice{ - ObjectMeta: metav1.ObjectMeta{ - Name: candidate.Name, - }, - Status: v1alpha1.BlockDeviceStatus{ - Type: candidate.Type, - FsType: candidate.FSType, - NodeName: candidate.NodeName, - Consumable: candidate.Consumable, - PVUuid: candidate.PVUuid, - VGUuid: candidate.VGUuid, - PartUUID: candidate.PartUUID, - LVMVolumeGroupName: candidate.LVMVolumeGroupName, - ActualVGNameOnTheNode: candidate.ActualVGNameOnTheNode, - Wwn: candidate.Wwn, - Serial: candidate.Serial, - Path: candidate.Path, - Size: *resource.NewQuantity(candidate.Size.Value(), resource.BinarySI), - Model: candidate.Model, - Rota: candidate.Rota, - MachineID: candidate.MachineID, - }, - } - - blockDevice.Labels = ConfigureBlockDeviceLabels(*blockDevice) - start := time.Now() - - err := kc.Create(ctx, blockDevice) - metrics.APIMethodsDuration(BlockDeviceCtrlName, "create").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.APIMethodsExecutionCount(BlockDeviceCtrlName, "create").Inc() - if err != nil { - metrics.APIMethodsErrors(BlockDeviceCtrlName, "create").Inc() - return nil, err - } - return blockDevice, nil -} - -func DeleteAPIBlockDevice(ctx context.Context, kc client.Client, metrics monitoring.Metrics, device *v1alpha1.BlockDevice) error { - start := time.Now() - err := kc.Delete(ctx, device) - metrics.APIMethodsDuration(BlockDeviceCtrlName, "delete").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.APIMethodsExecutionCount(BlockDeviceCtrlName, "delete").Inc() - if err != nil { - metrics.APIMethodsErrors(BlockDeviceCtrlName, "delete").Inc() - return err - } - return nil -} - -func ReTag(ctx context.Context, log logger.Logger, metrics monitoring.Metrics) error { - // thin pool - log.Debug("[ReTag] start re-tagging LV") - start := time.Now() - lvs, cmdStr, _, err := utils.GetAllLVs(ctx) - metrics.UtilsCommandsDuration(BlockDeviceCtrlName, "lvs").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.UtilsCommandsExecutionCount(BlockDeviceCtrlName, "lvs").Inc() - log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) - if err != nil { - metrics.UtilsCommandsErrorsCount(BlockDeviceCtrlName, "lvs").Inc() - log.Error(err, "[ReTag] unable to GetAllLVs") - return err - } - - for _, lv := range lvs { - tags := strings.Split(lv.LvTags, ",") - for _, tag := range tags { - if strings.Contains(tag, internal.LVMTags[0]) { - continue - } - - if strings.Contains(tag, internal.LVMTags[1]) { - start = time.Now() - cmdStr, err = utils.LVChangeDelTag(lv, tag) - metrics.UtilsCommandsDuration(BlockDeviceCtrlName, "lvchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.UtilsCommandsExecutionCount(BlockDeviceCtrlName, "lvchange").Inc() - log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) - if err != nil { - metrics.UtilsCommandsErrorsCount(BlockDeviceCtrlName, "lvchange").Inc() - log.Error(err, "[ReTag] unable to LVChangeDelTag") - return err - } - - start = time.Now() - cmdStr, err = utils.VGChangeAddTag(lv.VGName, internal.LVMTags[0]) - metrics.UtilsCommandsDuration(BlockDeviceCtrlName, "vgchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.UtilsCommandsExecutionCount(BlockDeviceCtrlName, "vgchange").Inc() - log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) - if err != nil { - metrics.UtilsCommandsErrorsCount(BlockDeviceCtrlName, "vgchange").Inc() - log.Error(err, "[ReTag] unable to VGChangeAddTag") - return err - } - } - } - } - log.Debug("[ReTag] end re-tagging LV") - - log.Debug("[ReTag] start re-tagging LVM") - start = time.Now() - vgs, cmdStr, _, err := utils.GetAllVGs(ctx) - metrics.UtilsCommandsDuration(BlockDeviceCtrlName, "vgs").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.UtilsCommandsExecutionCount(BlockDeviceCtrlName, "vgs").Inc() - log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) - if err != nil { - metrics.UtilsCommandsErrorsCount(BlockDeviceCtrlName, cmdStr).Inc() - log.Error(err, "[ReTag] unable to GetAllVGs") - return err - } - - for _, vg := range vgs { - tags := strings.Split(vg.VGTags, ",") - for _, tag := range tags { - if strings.Contains(tag, internal.LVMTags[0]) { - continue - } - - if strings.Contains(tag, internal.LVMTags[1]) { - start = time.Now() - cmdStr, err = utils.VGChangeDelTag(vg.VGName, tag) - metrics.UtilsCommandsDuration(BlockDeviceCtrlName, "vgchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.UtilsCommandsExecutionCount(BlockDeviceCtrlName, "vgchange").Inc() - log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) - if err != nil { - metrics.UtilsCommandsErrorsCount(BlockDeviceCtrlName, "vgchange").Inc() - log.Error(err, "[ReTag] unable to VGChangeDelTag") - return err - } - - start = time.Now() - cmdStr, err = utils.VGChangeAddTag(vg.VGName, internal.LVMTags[0]) - metrics.UtilsCommandsDuration(BlockDeviceCtrlName, "vgchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) - metrics.UtilsCommandsExecutionCount(BlockDeviceCtrlName, "vgchange").Inc() - log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) - if err != nil { - metrics.UtilsCommandsErrorsCount(BlockDeviceCtrlName, "vgchange").Inc() - log.Error(err, "[ReTag] unable to VGChangeAddTag") - return err - } - } - } - } - log.Debug("[ReTag] stop re-tagging LVM") - - return nil -} - -func getSerialForMultipathDevice(candidate internal.BlockDeviceCandidate, devices []internal.Device) (string, error) { - parentDevice := getParentDevice(candidate.PkName, devices) - if parentDevice.Name == "" { - err := fmt.Errorf("parent device %s not found for multipath device: %s in device list", candidate.PkName, candidate.Path) - return "", err - } - - if parentDevice.FSType != internal.MultiPathMemberFSType { - err := fmt.Errorf("parent device %s for multipath device %s is not a multipath member (fstype != %s)", parentDevice.Name, candidate.Path, internal.MultiPathMemberFSType) - return "", err - } - - if parentDevice.Serial == "" { - err := fmt.Errorf("serial number is empty for parent device %s", parentDevice.Name) - return "", err - } - - return parentDevice.Serial, nil -} - -func getParentDevice(pkName string, devices []internal.Device) internal.Device { - for _, device := range devices { - if device.Name == pkName { - return device - } - } - return internal.Device{} -} diff --git a/images/agent/src/pkg/controller/const.go b/images/agent/src/pkg/controller/const.go new file mode 100644 index 00000000..a21f8ba8 --- /dev/null +++ b/images/agent/src/pkg/controller/const.go @@ -0,0 +1,11 @@ +package controller + +const ( + CreateReconcile reconcileType = "Create" + UpdateReconcile reconcileType = "Update" + DeleteReconcile reconcileType = "Delete" +) + +type ( + reconcileType string +) diff --git a/images/agent/src/pkg/controller/controller.go b/images/agent/src/pkg/controller/controller.go new file mode 100644 index 00000000..568ea4ba --- /dev/null +++ b/images/agent/src/pkg/controller/controller.go @@ -0,0 +1,188 @@ +package controller + +import ( + "agent/pkg/logger" + "context" + "fmt" + "reflect" + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/source" + + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type ReconcileRequest[T client.Object] struct { + Object T +} + +type Result struct { + RequeueAfter time.Duration +} + +type Named interface { + Name() string +} + +type Reconciler[T client.Object] interface { + Named + ShouldReconcileUpdate(objectOld T, objectNew T) bool + Reconcile(context.Context, ReconcileRequest[T]) (Result, error) +} + +type Discoverer interface { + Named + Discover(context.Context) (Result, error) +} + +func AddReconciler[T client.Object]( + mgr manager.Manager, + log logger.Logger, + reconciler Reconciler[T], +) error { + t := reflect.TypeFor[T]() + if t.Kind() != reflect.Pointer { + panic("T is not a pointer") + } + + if t.Elem().Kind() != reflect.Struct { + panic("T is not a struct pointer") + } + + tname := t.Elem().Name() + + mgrCache := mgr.GetCache() + + c, err := controller.New( + reconciler.Name(), + mgr, + controller.Options{ + Reconciler: makeReconcileDispatcher(mgr, log, reconciler), + }, + ) + if err != nil { + return err + } + + var obj T + err = c.Watch( + source.Kind( + mgrCache, + obj, + handler.TypedFuncs[T, reconcile.Request]{ + CreateFunc: func( + _ context.Context, + e event.TypedCreateEvent[T], + q workqueue.TypedRateLimitingInterface[reconcile.Request], + ) { + log.Info(fmt.Sprintf("createFunc got a create event for the %s, name: %s", tname, e.Object.GetName())) + + request := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: e.Object.GetNamespace(), Name: e.Object.GetName()}} + q.Add(request) + + log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] createFunc added a request for the LVMVolumeGroup %s to the Reconcilers queue", e.Object.GetName())) + }, + UpdateFunc: func( + _ context.Context, + e event.TypedUpdateEvent[T], + q workqueue.TypedRateLimitingInterface[reconcile.Request], + ) { + log.Info(fmt.Sprintf("UpdateFunc got a update event for the %s %s", tname, e.ObjectNew.GetName())) + + if !reconciler.ShouldReconcileUpdate(e.ObjectOld, e.ObjectNew) { + log.Debug(fmt.Sprintf("updateFunc skipped a request for the %s %s to the Reconcilers queue", tname, e.ObjectNew.GetName())) + return + } + + request := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: e.ObjectNew.GetNamespace(), Name: e.ObjectNew.GetName()}} + q.Add(request) + + log.Info(fmt.Sprintf("updateFunc added a request for the %s %s to the Reconcilers queue", tname, e.ObjectNew.GetName())) + }, + }, + ), + ) + + return nil +} + +func AddDiscoverer( + mgr manager.Manager, + log logger.Logger, + discoverer Discoverer, +) (discover func(context.Context) (Result, error), err error) { + kCtrl, err := controller.New( + discoverer.Name(), + mgr, + controller.Options{ + Reconciler: makeDiscovererDispatcher(log, discoverer), + }, + ) + if err != nil { + return nil, err + } + + return func(ctx context.Context) (Result, error) { + res, err := kCtrl.Reconcile(ctx, reconcile.Request{}) + return Result{RequeueAfter: res.RequeueAfter}, err + }, nil +} + +func makeDiscovererDispatcher(log logger.Logger, discoverer Discoverer) reconcile.Func { + return reconcile.Func(func(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) { + log.Info(fmt.Sprintf("[DiscovererDispatcher] Discoverer starts")) + + result, err := discoverer.Discover(ctx) + + return reconcile.Result{ + RequeueAfter: result.RequeueAfter, + }, err + }) +} + +func makeReconcileDispatcher[T client.Object]( + mgr manager.Manager, + log logger.Logger, + reconciler Reconciler[T], +) reconcile.TypedReconciler[reconcile.Request] { + cl := mgr.GetClient() + return reconcile.Func(func(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + + // load object being reconciled + log.Info(fmt.Sprintf("[ReconcileDispatcher] Reconciler starts to reconcile the request %s", req.NamespacedName.String())) + + if req.Name == "" { + + } + + var obj T + if err := cl.Get(ctx, req.NamespacedName, obj); err != nil { + if errors.IsNotFound(err) { + log.Warning(fmt.Sprintf("[ReconcileDispatcher] seems like the object was deleted as unable to get it, err: %s. Stop to reconcile", err.Error())) + return reconcile.Result{}, nil + } + + log.Error(err, fmt.Sprintf("[ReconcileDispatcher] unable to get an object by NamespacedName %s", req.NamespacedName.String())) + return reconcile.Result{}, err + } + + // + result, err := reconciler.Reconcile( + ctx, + ReconcileRequest[T]{ + Object: obj, + }, + ) + return reconcile.Result{ + RequeueAfter: result.RequeueAfter, + }, err + }) +} diff --git a/images/agent/src/pkg/controller/controller_suite_test.go b/images/agent/src/pkg/controller/controller_suite_test.go deleted file mode 100644 index 9ebb5197..00000000 --- a/images/agent/src/pkg/controller/controller_suite_test.go +++ /dev/null @@ -1,45 +0,0 @@ -/* -Copyright 2023 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controller_test - -import ( - "testing" - - "github.com/deckhouse/sds-node-configurator/api/v1alpha1" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestController(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "Controller Suite") -} - -func NewFakeClient() client.WithWatch { - s := scheme.Scheme - _ = metav1.AddMetaToScheme(s) - _ = v1alpha1.AddToScheme(s) - - builder := fake.NewClientBuilder().WithScheme(s) - - cl := builder.Build() - return cl -} diff --git a/images/agent/src/pkg/controller/lvg/reconciler.go b/images/agent/src/pkg/controller/lvg/reconciler.go new file mode 100644 index 00000000..17e27ed9 --- /dev/null +++ b/images/agent/src/pkg/controller/lvg/reconciler.go @@ -0,0 +1,1536 @@ +package lvg + +import ( + "agent/internal" + "agent/pkg/cache" + "agent/pkg/controller" + "agent/pkg/logger" + "agent/pkg/monitoring" + "agent/pkg/utils" + "context" + "errors" + "fmt" + "reflect" + "slices" + "strconv" + "strings" + "time" + + "github.com/cloudflare/cfssl/log" + "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const Name = "lvm-volume-group-watcher-controller" +const LVGMetadateNameLabelKey = "kubernetes.io/metadata.name" + +const ( + Local = "Local" + Shared = "Shared" + + Failed = "Failed" + + NonOperational = "NonOperational" + + deletionProtectionAnnotation = "storage.deckhouse.io/deletion-protection" + + LVMVolumeGroupTag = "storage.deckhouse.io/lvmVolumeGroupName" +) + +type Reconciler struct { + cl client.Client + log logger.Logger + metrics monitoring.Metrics + sdsCache *cache.Cache + opts Options +} + +type Options struct { + NodeName string + BlockDeviceScanIntervalSec time.Duration + VolumeGroupScanIntervalSec time.Duration +} + +func NewReconciler( + cl client.Client, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, + opts Options, +) *Reconciler { + return &Reconciler{ + cl: cl, + log: log, + metrics: metrics, + sdsCache: sdsCache, + opts: opts, + } +} + +func (r *Reconciler) Name() string { + return Name +} + +func (r *Reconciler) Reconcile(ctx context.Context, request controller.ReconcileRequest[*v1alpha1.LVMVolumeGroup]) (controller.Result, error) { + r.log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] Reconciler starts to reconcile the request %s", request.Object.Name)) + + lvg := request.Object + + belongs := checkIfLVGBelongsToNode(lvg, r.opts.NodeName) + if !belongs { + r.log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] the LVMVolumeGroup %s does not belong to the node %s", lvg.Name, r.opts.NodeName)) + return controller.Result{}, nil + } + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] the LVMVolumeGroup %s belongs to the node %s. Starts to reconcile", lvg.Name, r.opts.NodeName)) + + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] tries to add the finalizer %s to the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + added, err := r.addLVGFinalizerIfNotExist(ctx, lvg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add the finalizer %s to the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + return controller.Result{}, err + } + + if added { + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully added a finalizer %s to the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] no need to add a finalizer %s to the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + } + + // this case handles the situation when a user decides to remove LVMVolumeGroup resource without created VG + deleted, err := r.deleteLVGIfNeeded(ctx, lvg) + if err != nil { + return controller.Result{}, err + } + + if deleted { + r.log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] the LVMVolumeGroup %s was deleted, stop the reconciliation", lvg.Name)) + return controller.Result{}, nil + } + + if _, exist := lvg.Labels[internal.LVGUpdateTriggerLabel]; exist { + delete(lvg.Labels, internal.LVGUpdateTriggerLabel) + err = r.cl.Update(ctx, lvg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to update the LVMVolumeGroup %s", lvg.Name)) + return controller.Result{}, err + } + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully removed the label %s from the LVMVolumeGroup %s", internal.LVGUpdateTriggerLabel, lvg.Name)) + } + + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] tries to get block device resources for the LVMVolumeGroup %s by the selector %v", lvg.Name, lvg.Spec.BlockDeviceSelector.MatchLabels)) + blockDevices, err := r.getAPIBlockDevices(ctx, lvg.Spec.BlockDeviceSelector) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to get BlockDevices. Retry in %s", r.opts.BlockDeviceScanIntervalSec.String())) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "NoBlockDevices", fmt.Sprintf("unable to get block devices resources, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s. Retry in %s", internal.TypeVGConfigurationApplied, lvg.Name, r.opts.BlockDeviceScanIntervalSec.String())) + } + + return controller.Result{RequeueAfter: r.opts.BlockDeviceScanIntervalSec}, nil + } + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully got block device resources for the LVMVolumeGroup %s by the selector %v", lvg.Name, lvg.Spec.BlockDeviceSelector.MatchLabels)) + + valid, reason := validateSpecBlockDevices(lvg, blockDevices) + if !valid { + r.log.Warning(fmt.Sprintf("[RunLVMVolumeGroupController] validation failed for the LVMVolumeGroup %s, reason: %s", lvg.Name, reason)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonValidationFailed, reason) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s. Retry in %s", internal.TypeVGConfigurationApplied, lvg.Name, cfg.VolumeGroupScanIntervalSec.String())) + return controller.Result{}, err + } + + return controller.Result{}, nil + } + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully validated BlockDevices of the LVMVolumeGroup %s", lvg.Name)) + + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] tries to add label %s to the LVMVolumeGroup %s", LVGMetadateNameLabelKey, cfg.NodeName)) + added, err = r.addLVGLabelIfNeeded(ctx, lvg, LVGMetadateNameLabelKey, lvg.Name) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add label %s to the LVMVolumeGroup %s", LVGMetadateNameLabelKey, lvg.Name)) + return controller.Result{}, err + } + + if added { + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully added label %s to the LVMVolumeGroup %s", LVGMetadateNameLabelKey, lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] no need to add label %s to the LVMVolumeGroup %s", LVGMetadateNameLabelKey, lvg.Name)) + } + + // We do this after BlockDevices validation and node belonging check to prevent multiple updates by all agents pods + bds, _ := r.sdsCache.GetDevices() + if len(bds) == 0 { + r.log.Warning(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] no block devices in the cache, add the LVMVolumeGroup %s to requeue", lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "CacheEmpty", "unable to apply configuration due to the cache's state") + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s. Retry in %s", internal.TypeVGConfigurationApplied, lvg.Name, cfg.VolumeGroupScanIntervalSec.String())) + } + + return controller.Result{ + RequeueAfter: r.opts.VolumeGroupScanIntervalSec, + }, nil + } + + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] tries to sync status and spec thin-pool AllicationLimit fields for the LVMVolumeGroup %s", lvg.Name)) + err = r.syncThinPoolsAllocationLimit(ctx, lvg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to sync status and spec thin-pool AllocationLimit fields for the LVMVolumeGroup %s", lvg.Name)) + return controller.Result{}, err + } + + shouldRequeue, err := r.runEventReconcile(ctx, lvg, blockDevices) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to reconcile the LVMVolumeGroup %s", lvg.Name)) + } + + if shouldRequeue { + r.log.Warning(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] the LVMVolumeGroup %s event will be requeued in %s", lvg.Name, r.opts.VolumeGroupScanIntervalSec.String())) + return controller.Result{ + RequeueAfter: r.opts.VolumeGroupScanIntervalSec, + }, nil + } + r.log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] Reconciler successfully reconciled the LVMVolumeGroup %s", lvg.Name)) + + return controller.Result{}, nil + +} + +func (r *Reconciler) runEventReconcile( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + blockDevices map[string]v1alpha1.BlockDevice, +) (bool, error) { + recType := r.identifyLVGReconcileFunc(lvg, sdsCache) + + switch recType { + case CreateReconcile: + r.log.Info(fmt.Sprintf("[runEventReconcile] CreateReconcile starts the reconciliation for the LVMVolumeGroup %s", lvg.Name)) + return r.reconcileLVGCreateFunc(ctx, lvg, blockDevices) + case UpdateReconcile: + r.log.Info(fmt.Sprintf("[runEventReconcile] UpdateReconcile starts the reconciliation for the LVMVolumeGroup %s", lvg.Name)) + return r.reconcileLVGUpdateFunc(ctx, lvg, blockDevices) + case DeleteReconcile: + r.log.Info(fmt.Sprintf("[runEventReconcile] DeleteReconcile starts the reconciliation for the LVMVolumeGroup %s", lvg.Name)) + return r.reconcileLVGDeleteFunc(ctx, lvg) + default: + r.log.Info(fmt.Sprintf("[runEventReconcile] no need to reconcile the LVMVolumeGroup %s", lvg.Name)) + } + return false, nil +} + +func (r *Reconciler) reconcileLVGDeleteFunc(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup) (bool, error) { + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] starts to reconcile the LVMVolumeGroup %s", lvg.Name)) + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] tries to add the condition %s status false to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + + // this check prevents the LVMVolumeGroup resource's infinity updating after a retry + for _, c := range lvg.Status.Conditions { + if c.Type == internal.TypeVGConfigurationApplied && c.Reason != internal.ReasonTerminating { + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonTerminating, "trying to delete VG") + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + break + } + } + + _, exist := lvg.Annotations[deletionProtectionAnnotation] + if exist { + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] the LVMVolumeGroup %s has a deletion timestamp but also has a deletion protection annotation %s. Remove it to proceed the delete operation", lvg.Name, deletionProtectionAnnotation)) + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonTerminating, fmt.Sprintf("to delete the LVG remove the annotation %s", deletionProtectionAnnotation)) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + + return false, nil + } + + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] check if VG %s of the LVMVolumeGroup %s uses LVs", lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + usedLVs := r.getLVForVG(lvg.Spec.ActualVGNameOnTheNode) + if len(usedLVs) > 0 { + err := fmt.Errorf("VG %s uses LVs: %v. Delete used LVs first", lvg.Spec.ActualVGNameOnTheNode, usedLVs) + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to reconcile LVG %s", lvg.Name)) + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] tries to add the condition %s status False to the LVMVolumeGroup %s due to LV does exist", internal.TypeVGConfigurationApplied, lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonTerminating, err.Error()) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + + return true, nil + } + + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] VG %s of the LVMVolumeGroup %s does not use any LV. Start to delete the VG", lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + err := r.deleteVGIfExist(lvg.Spec.ActualVGNameOnTheNode) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to delete VG %s", lvg.Spec.ActualVGNameOnTheNode)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonTerminating, err.Error()) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + + return true, err + } + + removed, err := r.removeLVGFinalizerIfExist(ctx, lvg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to remove a finalizer %s from the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonTerminating, err.Error()) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + return true, err + } + + if removed { + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] successfully removed a finalizer %s from the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[reconcileLVGDeleteFunc] no need to remove a finalizer %s from the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + } + + err = r.deleteLVMVolumeGroup(ctx, lvg, r.opts.NodeName) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGDeleteFunc] unable to delete the LVMVolumeGroup %s", lvg.Name)) + return true, err + } + + r.log.Info(fmt.Sprintf("[reconcileLVGDeleteFunc] successfully reconciled VG %s of the LVMVolumeGroup %s", lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + return false, nil +} + +func (r *Reconciler) reconcileLVGUpdateFunc( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + blockDevices map[string]v1alpha1.BlockDevice, +) (bool, error) { + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] starts to reconcile the LVMVolumeGroup %s", lvg.Name)) + + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] tries to validate the LVMVolumeGroup %s", lvg.Name)) + pvs, _ := r.sdsCache.GetPVs() + valid, reason := r.validateLVGForUpdateFunc(lvg, blockDevices) + if !valid { + r.log.Warning(fmt.Sprintf("[reconcileLVGUpdateFunc] the LVMVolumeGroup %s is not valid", lvg.Name)) + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonValidationFailed, reason) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s reason %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, internal.ReasonValidationFailed, lvg.Name)) + } + + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully validated the LVMVolumeGroup %s", lvg.Name)) + + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] tries to get VG %s for the LVMVolumeGroup %s", lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + found, vg := tryGetVG(r.sdsCache, lvg.Spec.ActualVGNameOnTheNode) + if !found { + err := fmt.Errorf("VG %s not found", lvg.Spec.ActualVGNameOnTheNode) + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to reconcile the LVMVolumeGroup %s", lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "VGNotFound", err.Error()) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] VG %s found for the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] tries to check and update VG %s tag %s", lvg.Spec.ActualVGNameOnTheNode, internal.LVMTags[0])) + updated, err := r.updateVGTagIfNeeded(ctx, lvg, vg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to update VG %s tag of the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "VGUpdateFailed", fmt.Sprintf("unable to update VG tag, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + + return true, err + } + + if updated { + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully updated VG %s tag of the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] no need to update VG %s tag of the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + } + + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] starts to resize PV of the LVMVolumeGroup %s", lvg.Name)) + err = r.resizePVIfNeeded(ctx, lvg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to resize PV of the LVMVolumeGroup %s", lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "PVResizeFailed", fmt.Sprintf("unable to resize PV, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully ended the resize operation for PV of the LVMVolumeGroup %s", lvg.Name)) + + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] starts to extend VG %s of the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + err = r.extendVGIfNeeded(ctx, lvg, vg, pvs, blockDevices) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to extend VG of the LVMVolumeGroup %s", lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "VGExtendFailed", fmt.Sprintf("unable to extend VG, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully ended the extend operation for VG of the LVMVolumeGroup %s", lvg.Name)) + + if lvg.Spec.ThinPools != nil { + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] starts to reconcile thin-pools of the LVMVolumeGroup %s", lvg.Name)) + lvs, _ := r.sdsCache.GetLVs() + err = r.reconcileThinPoolsIfNeeded(ctx, lvg, vg, lvs) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to reconcile thin-pools of the LVMVolumeGroup %s", lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "ThinPoolReconcileFailed", fmt.Sprintf("unable to reconcile thin-pools, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully reconciled thin-pools operation of the LVMVolumeGroup %s", lvg.Name)) + } + + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] tries to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionTrue, internal.TypeVGConfigurationApplied, "Applied", "configuration has been applied") + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGUpdateFunc] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully added a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + r.log.Info(fmt.Sprintf("[reconcileLVGUpdateFunc] successfully reconciled the LVMVolumeGroup %s", lvg.Name)) + + return false, nil +} + +func (r *Reconciler) reconcileLVGCreateFunc( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + blockDevices map[string]v1alpha1.BlockDevice, +) (bool, error) { + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] starts to reconcile the LVMVolumeGroup %s", lvg.Name)) + + // this check prevents the LVMVolumeGroup resource's infinity updating after a retry + exist := false + for _, c := range lvg.Status.Conditions { + if c.Type == internal.TypeVGConfigurationApplied { + exist = true + break + } + } + + if !exist { + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] tries to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonCreating, "trying to apply the configuration") + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGCreateFunc] unable to add the condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + } + + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] tries to validate the LVMVolumeGroup %s", lvg.Name)) + valid, reason := r.validateLVGForCreateFunc(lvg, blockDevices) + if !valid { + r.log.Warning(fmt.Sprintf("[reconcileLVGCreateFunc] validation fails for the LVMVolumeGroup %s", lvg.Name)) + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonValidationFailed, reason) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + + return true, err + } + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] successfully validated the LVMVolumeGroup %s", lvg.Name)) + + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] tries to create VG for the LVMVolumeGroup %s", lvg.Name)) + err := r.createVGComplex(lvg, blockDevices) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGCreateFunc] unable to create VG for the LVMVolumeGroup %s", lvg.Name)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "VGCreationFailed", fmt.Sprintf("unable to create VG, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + return true, err + } + r.log.Info(fmt.Sprintf("[reconcileLVGCreateFunc] successfully created VG for the LVMVolumeGroup %s", lvg.Name)) + + if lvg.Spec.ThinPools != nil { + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] the LVMVolumeGroup %s has thin-pools. Tries to create them", lvg.Name)) + + for _, tp := range lvg.Spec.ThinPools { + vgSize := countVGSizeByBlockDevices(blockDevices) + tpRequestedSize, err := getRequestedSizeFromString(tp.Size, vgSize) + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGCreateFunc] unable to get thin-pool %s requested size of the LVMVolumeGroup %s", tp.Name, lvg.Name)) + return false, err + } + + var cmd string + if utils.AreSizesEqualWithinDelta(tpRequestedSize, vgSize, internal.ResizeDelta) { + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] Thin-pool %s of the LVMVolumeGroup %s will be created with full VG space size", tp.Name, lvg.Name)) + cmd, err = utils.CreateThinPoolFullVGSpace(tp.Name, lvg.Spec.ActualVGNameOnTheNode) + } else { + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] Thin-pool %s of the LVMVolumeGroup %s will be created with size %s", tp.Name, lvg.Name, tpRequestedSize.String())) + cmd, err = utils.CreateThinPool(tp.Name, lvg.Spec.ActualVGNameOnTheNode, tpRequestedSize.Value()) + } + if err != nil { + r.log.Error(err, fmt.Sprintf("[reconcileLVGCreateFunc] unable to create thin-pool %s of the LVMVolumeGroup %s, cmd: %s", tp.Name, lvg.Name, cmd)) + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, "ThinPoolCreationFailed", fmt.Sprintf("unable to create thin-pool, err: %s", err.Error())) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + } + + return true, err + } + } + r.log.Debug(fmt.Sprintf("[reconcileLVGCreateFunc] successfully created thin-pools for the LVMVolumeGroup %s", lvg.Name)) + } + + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionTrue, internal.TypeVGConfigurationApplied, "Success", "all configuration has been applied") + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to add a condition %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, lvg.Name)) + return true, err + } + + return false, nil +} + +func (r *Reconciler) deleteLVMVolumeGroup(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup, currentNode string) error { + r.log.Debug(fmt.Sprintf(`[DeleteLVMVolumeGroup] Node "%s" does not belong to VG "%s". It will be removed from LVM resource, name "%s"'`, currentNode, lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + for i, node := range lvg.Status.Nodes { + if node.Name == currentNode { + // delete node + lvg.Status.Nodes = append(lvg.Status.Nodes[:i], lvg.Status.Nodes[i+1:]...) + r.log.Info(fmt.Sprintf(`[DeleteLVMVolumeGroup] deleted node "%s" from LVMVolumeGroup "%s"`, node.Name, lvg.Name)) + } + } + + // If current LVMVolumeGroup has no nodes left, delete it. + if len(lvg.Status.Nodes) == 0 { + start := time.Now() + err := r.cl.Delete(ctx, lvg) + r.metrics.APIMethodsDuration(Name, "delete").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.APIMethodsExecutionCount(Name, "delete").Inc() + if err != nil { + r.metrics.APIMethodsErrors(Name, "delete").Inc() + return err + } + r.log.Info(fmt.Sprintf("[DeleteLVMVolumeGroup] the LVMVolumeGroup %s deleted", lvg.Name)) + } + + return nil +} + +func checkIfVGExist(vgName string, vgs []internal.VGData) bool { + for _, vg := range vgs { + if vg.VGName == vgName { + return true + } + } + + return false +} + +func (r *Reconciler) shouldUpdateLVGLabels(lvg *v1alpha1.LVMVolumeGroup, labelKey, labelValue string) bool { + if lvg.Labels == nil { + r.log.Debug(fmt.Sprintf("[shouldUpdateLVGLabels] the LVMVolumeGroup %s has no labels.", lvg.Name)) + return true + } + + val, exist := lvg.Labels[labelKey] + if !exist { + r.log.Debug(fmt.Sprintf("[shouldUpdateLVGLabels] the LVMVolumeGroup %s has no label %s.", lvg.Name, labelKey)) + return true + } + + if val != labelValue { + r.log.Debug(fmt.Sprintf("[shouldUpdateLVGLabels] the LVMVolumeGroup %s has label %s but the value is incorrect - %s (should be %s)", lvg.Name, labelKey, val, labelValue)) + return true + } + + return false +} + +func (r *Reconciler) shouldLVGWatcherReconcileUpdateEvent(oldLVG, newLVG *v1alpha1.LVMVolumeGroup) bool { + if newLVG.DeletionTimestamp != nil { + r.log.Debug(fmt.Sprintf("[shouldLVGWatcherReconcileUpdateEvent] update event should be reconciled as the LVMVolumeGroup %s has deletionTimestamp", newLVG.Name)) + return true + } + + if _, exist := newLVG.Labels[internal.LVGUpdateTriggerLabel]; exist { + r.log.Debug(fmt.Sprintf("[shouldLVGWatcherReconcileUpdateEvent] update event should be reconciled as the LVMVolumeGroup %s has the label %s", newLVG.Name, internal.LVGUpdateTriggerLabel)) + return true + } + + if r.shouldUpdateLVGLabels(log, newLVG, LVGMetadateNameLabelKey, newLVG.Name) { + r.log.Debug(fmt.Sprintf("[shouldLVGWatcherReconcileUpdateEvent] update event should be reconciled as the LVMVolumeGroup's %s labels have been changed", newLVG.Name)) + return true + } + + if !reflect.DeepEqual(oldLVG.Spec, newLVG.Spec) { + r.log.Debug(fmt.Sprintf("[shouldLVGWatcherReconcileUpdateEvent] update event should be reconciled as the LVMVolumeGroup %s configuration has been changed", newLVG.Name)) + return true + } + + for _, c := range newLVG.Status.Conditions { + if c.Type == internal.TypeVGConfigurationApplied { + if c.Reason == internal.ReasonUpdating || c.Reason == internal.ReasonCreating { + log.Debug(fmt.Sprintf("[shouldLVGWatcherReconcileUpdateEvent] update event should not be reconciled as the LVMVolumeGroup %s reconciliation still in progress", newLVG.Name)) + return false + } + } + } + + for _, n := range newLVG.Status.Nodes { + for _, d := range n.Devices { + if !utils.AreSizesEqualWithinDelta(d.PVSize, d.DevSize, internal.ResizeDelta) { + log.Debug(fmt.Sprintf("[shouldLVGWatcherReconcileUpdateEvent] update event should be reconciled as the LVMVolumeGroup %s PV size is different to device size", newLVG.Name)) + return true + } + } + } + + return false +} + +func shouldReconcileLVGByDeleteFunc(lvg *v1alpha1.LVMVolumeGroup) bool { + return lvg.DeletionTimestamp != nil +} + +func (r *Reconciler) updateLVGConditionIfNeeded( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + status v1.ConditionStatus, + conType, reason, message string, +) error { + exist := false + index := 0 + newCondition := v1.Condition{ + Type: conType, + Status: status, + ObservedGeneration: lvg.Generation, + LastTransitionTime: v1.NewTime(time.Now()), + Reason: reason, + Message: message, + } + + if lvg.Status.Conditions == nil { + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] the LVMVolumeGroup %s conditions is nil. Initialize them", lvg.Name)) + lvg.Status.Conditions = make([]v1.Condition, 0, 5) + } + + if len(lvg.Status.Conditions) > 0 { + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] there are some conditions in the LVMVolumeGroup %s. Tries to find a condition %s", lvg.Name, conType)) + for i, c := range lvg.Status.Conditions { + if c.Type == conType { + if checkIfEqualConditions(c, newCondition) { + log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] no need to update condition %s in the LVMVolumeGroup %s as new and old condition states are the same", conType, lvg.Name)) + return nil + } + + index = i + exist = true + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] a condition %s was found in the LVMVolumeGroup %s at the index %d", conType, lvg.Name, i)) + } + } + + if !exist { + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] a condition %s was not found. Append it in the end of the LVMVolumeGroup %s conditions", conType, lvg.Name)) + lvg.Status.Conditions = append(lvg.Status.Conditions, newCondition) + } else { + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] insert the condition %s status %s reason %s message %s at index %d of the LVMVolumeGroup %s conditions", conType, status, reason, message, index, lvg.Name)) + lvg.Status.Conditions[index] = newCondition + } + } else { + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] no conditions were found in the LVMVolumeGroup %s. Append the condition %s in the end", lvg.Name, conType)) + lvg.Status.Conditions = append(lvg.Status.Conditions, newCondition) + } + + r.log.Debug(fmt.Sprintf("[updateLVGConditionIfNeeded] tries to update the condition type %s status %s reason %s message %s of the LVMVolumeGroup %s", conType, status, reason, message, lvg.Name)) + return r.cl.Status().Update(ctx, lvg) +} + +func checkIfEqualConditions(first, second v1.Condition) bool { + return first.Type == second.Type && + first.Status == second.Status && + first.Reason == second.Reason && + first.Message == second.Message && + first.ObservedGeneration == second.ObservedGeneration +} + +func (r *Reconciler) addLVGFinalizerIfNotExist(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup) (bool, error) { + if slices.Contains(lvg.Finalizers, internal.SdsNodeConfiguratorFinalizer) { + return false, nil + } + + lvg.Finalizers = append(lvg.Finalizers, internal.SdsNodeConfiguratorFinalizer) + err := r.cl.Update(ctx, lvg) + if err != nil { + return false, err + } + + return true, nil +} + +func (r *Reconciler) syncThinPoolsAllocationLimit(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup) error { + updated := false + + tpSpecLimits := make(map[string]string, len(lvg.Spec.ThinPools)) + for _, tp := range lvg.Spec.ThinPools { + tpSpecLimits[tp.Name] = tp.AllocationLimit + } + + var ( + space resource.Quantity + err error + ) + for i := range lvg.Status.ThinPools { + if specLimits, matched := tpSpecLimits[lvg.Status.ThinPools[i].Name]; matched { + if lvg.Status.ThinPools[i].AllocationLimit != specLimits { + r.log.Debug(fmt.Sprintf("[syncThinPoolsAllocationLimit] thin-pool %s status AllocationLimit: %s of the LVMVolumeGroup %s should be updated by spec one: %s", lvg.Status.ThinPools[i].Name, lvg.Status.ThinPools[i].AllocationLimit, lvg.Name, specLimits)) + updated = true + lvg.Status.ThinPools[i].AllocationLimit = specLimits + + space, err = getThinPoolAvailableSpace(lvg.Status.ThinPools[i].ActualSize, lvg.Status.ThinPools[i].AllocatedSize, specLimits) + if err != nil { + r.log.Error(err, fmt.Sprintf("[syncThinPoolsAllocationLimit] unable to get thin pool %s available space", lvg.Status.ThinPools[i].Name)) + return err + } + r.log.Debug(fmt.Sprintf("[syncThinPoolsAllocationLimit] successfully got a new available space %s of the thin-pool %s", space.String(), lvg.Status.ThinPools[i].Name)) + lvg.Status.ThinPools[i].AvailableSpace = space + } + } else { + r.log.Debug(fmt.Sprintf("[syncThinPoolsAllocationLimit] status thin-pool %s of the LVMVolumeGroup %s was not found as used in spec", lvg.Status.ThinPools[i].Name, lvg.Name)) + } + } + + if updated { + fmt.Printf("%+v", lvg.Status.ThinPools) + r.log.Debug(fmt.Sprintf("[syncThinPoolsAllocationLimit] tries to update the LVMVolumeGroup %s", lvg.Name)) + err = r.cl.Status().Update(ctx, lvg) + if err != nil { + return err + } + r.log.Debug(fmt.Sprintf("[syncThinPoolsAllocationLimit] successfully updated the LVMVolumeGroup %s", lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[syncThinPoolsAllocationLimit] every status thin-pool AllocationLimit value is synced with spec one for the LVMVolumeGroup %s", lvg.Name)) + } + + return nil +} + +func validateSpecBlockDevices(lvg *v1alpha1.LVMVolumeGroup, blockDevices map[string]v1alpha1.BlockDevice) (bool, string) { + if len(blockDevices) == 0 { + return false, "none of specified BlockDevices were found" + } + + if len(lvg.Status.Nodes) > 0 { + lostBdNames := make([]string, 0, len(lvg.Status.Nodes[0].Devices)) + for _, n := range lvg.Status.Nodes { + for _, d := range n.Devices { + if _, found := blockDevices[d.BlockDevice]; !found { + lostBdNames = append(lostBdNames, d.BlockDevice) + } + } + } + + // that means some of the used BlockDevices no longer match the blockDeviceSelector + if len(lostBdNames) > 0 { + return false, fmt.Sprintf("these BlockDevices no longer match the blockDeviceSelector: %s", strings.Join(lostBdNames, ",")) + } + } + + for _, me := range lvg.Spec.BlockDeviceSelector.MatchExpressions { + if me.Key == internal.MetadataNameLabelKey { + if len(me.Values) != len(blockDevices) { + missedBds := make([]string, 0, len(me.Values)) + for _, bdName := range me.Values { + if _, exist := blockDevices[bdName]; !exist { + missedBds = append(missedBds, bdName) + } + } + + return false, fmt.Sprintf("unable to find specified BlockDevices: %s", strings.Join(missedBds, ",")) + } + } + } + + bdFromOtherNode := make([]string, 0, len(blockDevices)) + for _, bd := range blockDevices { + if bd.Status.NodeName != lvg.Spec.Local.NodeName { + bdFromOtherNode = append(bdFromOtherNode, bd.Name) + } + } + + if len(bdFromOtherNode) != 0 { + return false, fmt.Sprintf("block devices %s have different node names from LVMVolumeGroup Local.NodeName", strings.Join(bdFromOtherNode, ",")) + } + + return true, "" +} + +func (r *Reconciler) deleteLVGIfNeeded(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup) (bool, error) { + if lvg.DeletionTimestamp == nil { + return false, nil + } + + vgs, _ := r.sdsCache.GetVGs() + if !checkIfVGExist(lvg.Spec.ActualVGNameOnTheNode, vgs) { + r.log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] VG %s was not yet created for the LVMVolumeGroup %s and the resource is marked as deleting. Delete the resource", lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + removed, err := removeLVGFinalizerIfExist(ctx, lvg) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to remove the finalizer %s from the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + return false, err + } + + if removed { + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully removed the finalizer %s from the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] no need to remove the finalizer %s from the LVMVolumeGroup %s", internal.SdsNodeConfiguratorFinalizer, lvg.Name)) + } + + err = r.deleteLVMVolumeGroup(ctx, lvg, r.opts.NodeName) + if err != nil { + r.log.Error(err, fmt.Sprintf("[RunLVMVolumeGroupWatcherController] unable to delete the LVMVolumeGroup %s", lvg.Name)) + return false, err + } + r.log.Info(fmt.Sprintf("[RunLVMVolumeGroupWatcherController] successfully deleted the LVMVolumeGroup %s", lvg.Name)) + return true, nil + } + return false, nil +} + +func checkIfLVGBelongsToNode(lvg *v1alpha1.LVMVolumeGroup, nodeName string) bool { + return lvg.Spec.Local.NodeName == nodeName +} + +func extractPathsFromBlockDevices(targetDevices []string, blockDevices map[string]v1alpha1.BlockDevice) []string { + var paths []string + if len(targetDevices) > 0 { + paths = make([]string, 0, len(targetDevices)) + for _, bdName := range targetDevices { + bd := blockDevices[bdName] + paths = append(paths, bd.Status.Path) + } + } else { + paths = make([]string, 0, len(blockDevices)) + for _, bd := range blockDevices { + paths = append(paths, bd.Status.Path) + } + } + + return paths +} + +func getRequestedSizeFromString(size string, targetSpace resource.Quantity) (resource.Quantity, error) { + switch isPercentSize(size) { + case true: + strPercent := strings.Split(size, "%")[0] + percent, err := strconv.Atoi(strPercent) + if err != nil { + return resource.Quantity{}, err + } + lvSize := targetSpace.Value() * int64(percent) / 100 + return *resource.NewQuantity(lvSize, resource.BinarySI), nil + case false: + return resource.ParseQuantity(size) + } + + return resource.Quantity{}, nil +} + +func countVGSizeByBlockDevices(blockDevices map[string]v1alpha1.BlockDevice) resource.Quantity { + var totalVGSize int64 + for _, bd := range blockDevices { + totalVGSize += bd.Status.Size.Value() + } + return *resource.NewQuantity(totalVGSize, resource.BinarySI) +} + +func (r *Reconciler) validateLVGForCreateFunc( + lvg *v1alpha1.LVMVolumeGroup, + blockDevices map[string]v1alpha1.BlockDevice, +) (bool, string) { + reason := strings.Builder{} + + r.log.Debug(fmt.Sprintf("[validateLVGForCreateFunc] check if every selected BlockDevice of the LVMVolumeGroup %s is consumable", lvg.Name)) + // totalVGSize needs to count if there is enough space for requested thin-pools + totalVGSize := countVGSizeByBlockDevices(blockDevices) + for _, bd := range blockDevices { + if !bd.Status.Consumable { + r.log.Warning(fmt.Sprintf("[validateLVGForCreateFunc] BlockDevice %s is not consumable", bd.Name)) + r.log.Trace(fmt.Sprintf("[validateLVGForCreateFunc] BlockDevice name: %s, status: %+v", bd.Name, bd.Status)) + reason.WriteString(fmt.Sprintf("BlockDevice %s is not consumable. ", bd.Name)) + } + } + + if reason.Len() == 0 { + r.log.Debug(fmt.Sprintf("[validateLVGForCreateFunc] all BlockDevices of the LVMVolumeGroup %s are consumable", lvg.Name)) + } + + if lvg.Spec.ThinPools != nil { + r.log.Debug(fmt.Sprintf("[validateLVGForCreateFunc] the LVMVolumeGroup %s has thin-pools. Validate if VG size has enough space for the thin-pools", lvg.Name)) + r.log.Trace(fmt.Sprintf("[validateLVGForCreateFunc] the LVMVolumeGroup %s has thin-pools %v", lvg.Name, lvg.Spec.ThinPools)) + r.log.Trace(fmt.Sprintf("[validateLVGForCreateFunc] total LVMVolumeGroup %s size: %s", lvg.Name, totalVGSize.String())) + + var totalThinPoolSize int64 + for _, tp := range lvg.Spec.ThinPools { + tpRequestedSize, err := getRequestedSizeFromString(tp.Size, totalVGSize) + if err != nil { + reason.WriteString(err.Error()) + continue + } + + if tpRequestedSize.Value() == 0 { + reason.WriteString(fmt.Sprintf("Thin-pool %s has zero size. ", tp.Name)) + continue + } + + // means a user want a thin-pool with 100%FREE size + if utils.AreSizesEqualWithinDelta(tpRequestedSize, totalVGSize, internal.ResizeDelta) { + if len(lvg.Spec.ThinPools) > 1 { + reason.WriteString(fmt.Sprintf("Thin-pool %s requested size of full VG space, but there is any other thin-pool. ", tp.Name)) + } + } + + totalThinPoolSize += tpRequestedSize.Value() + } + r.log.Trace(fmt.Sprintf("[validateLVGForCreateFunc] LVMVolumeGroup %s thin-pools requested space: %d", lvg.Name, totalThinPoolSize)) + + if totalThinPoolSize != totalVGSize.Value() && totalThinPoolSize+internal.ResizeDelta.Value() >= totalVGSize.Value() { + r.log.Trace(fmt.Sprintf("[validateLVGForCreateFunc] total thin pool size: %s, total vg size: %s", resource.NewQuantity(totalThinPoolSize, resource.BinarySI).String(), totalVGSize.String())) + r.log.Warning(fmt.Sprintf("[validateLVGForCreateFunc] requested thin pool size is more than VG total size for the LVMVolumeGroup %s", lvg.Name)) + reason.WriteString(fmt.Sprintf("Required space for thin-pools %d is more than VG size %d.", totalThinPoolSize, totalVGSize.Value())) + } + } + + if reason.Len() != 0 { + return false, reason.String() + } + + return true, "" +} + +func (r *Reconciler) validateLVGForUpdateFunc( + lvg *v1alpha1.LVMVolumeGroup, + blockDevices map[string]v1alpha1.BlockDevice, +) (bool, string) { + reason := strings.Builder{} + pvs, _ := r.sdsCache.GetPVs() + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] check if every new BlockDevice of the LVMVolumeGroup %s is comsumable", lvg.Name)) + actualPVPaths := make(map[string]struct{}, len(pvs)) + for _, pv := range pvs { + actualPVPaths[pv.PVName] = struct{}{} + } + + //TODO: add a check if BlockDevice size got less than PV size + + // Check if added BlockDevices are consumable + // additionBlockDeviceSpace value is needed to count if VG will have enough space for thin-pools + var additionBlockDeviceSpace int64 + for _, bd := range blockDevices { + if _, found := actualPVPaths[bd.Status.Path]; !found { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] unable to find the PV %s for BlockDevice %s. Check if the BlockDevice is already used", bd.Status.Path, bd.Name)) + for _, n := range lvg.Status.Nodes { + for _, d := range n.Devices { + if d.BlockDevice == bd.Name { + r.log.Warning(fmt.Sprintf("[validateLVGForUpdateFunc] BlockDevice %s misses the PV %s. That might be because the corresponding device was removed from the node. Unable to validate BlockDevices", bd.Name, bd.Status.Path)) + reason.WriteString(fmt.Sprintf("BlockDevice %s misses the PV %s (that might be because the device was removed from the node). ", bd.Name, bd.Status.Path)) + } + + if reason.Len() == 0 { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] BlockDevice %s does not miss a PV", d.BlockDevice)) + } + } + } + + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] PV %s for BlockDevice %s of the LVMVolumeGroup %s is not created yet, check if the BlockDevice is consumable", bd.Status.Path, bd.Name, lvg.Name)) + if reason.Len() > 0 { + r.log.Debug("[validateLVGForUpdateFunc] some BlockDevices misses its PVs, unable to check if they are consumable") + continue + } + + if !bd.Status.Consumable { + reason.WriteString(fmt.Sprintf("BlockDevice %s is not consumable. ", bd.Name)) + continue + } + + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] BlockDevice %s is consumable", bd.Name)) + additionBlockDeviceSpace += bd.Status.Size.Value() + } + } + + if lvg.Spec.ThinPools != nil { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] the LVMVolumeGroup %s has thin-pools. Validate them", lvg.Name)) + actualThinPools := make(map[string]internal.LVData, len(lvg.Spec.ThinPools)) + for _, tp := range lvg.Spec.ThinPools { + lv := r.sdsCache.FindLV(lvg.Spec.ActualVGNameOnTheNode, tp.Name) + if lv != nil { + if !isThinPool(lv.Data) { + reason.WriteString(fmt.Sprintf("LV %s is already created on the node and it is not a thin-pool", lv.Data.LVName)) + continue + } + + actualThinPools[lv.Data.LVName] = lv.Data + } + } + + // check if added thin-pools has valid requested size + var ( + addingThinPoolSize int64 + hasFullThinPool = false + ) + + vg := r.sdsCache.FindVG(lvg.Spec.ActualVGNameOnTheNode) + if vg == nil { + reason.WriteString(fmt.Sprintf("Missed VG %s in the cache", lvg.Spec.ActualVGNameOnTheNode)) + return false, reason.String() + } + + newTotalVGSize := resource.NewQuantity(vg.VGSize.Value()+additionBlockDeviceSpace, resource.BinarySI) + for _, specTp := range lvg.Spec.ThinPools { + // might be a case when Thin-pool is already created, but is not shown in status + tpRequestedSize, err := getRequestedSizeFromString(specTp.Size, *newTotalVGSize) + if err != nil { + reason.WriteString(err.Error()) + continue + } + + if tpRequestedSize.Value() == 0 { + reason.WriteString(fmt.Sprintf("Thin-pool %s has zero size. ", specTp.Name)) + continue + } + + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] the LVMVolumeGroup %s thin-pool %s requested size %s, Status VG size %s", lvg.Name, specTp.Name, tpRequestedSize.String(), lvg.Status.VGSize.String())) + switch utils.AreSizesEqualWithinDelta(tpRequestedSize, *newTotalVGSize, internal.ResizeDelta) { + // means a user wants 100% of VG space + case true: + hasFullThinPool = true + if len(lvg.Spec.ThinPools) > 1 { + // as if a user wants thin-pool with 100%VG size, there might be only one thin-pool + reason.WriteString(fmt.Sprintf("Thin-pool %s requests size of full VG space, but there are any other thin-pools. ", specTp.Name)) + } + case false: + if actualThinPool, created := actualThinPools[specTp.Name]; !created { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] thin-pool %s of the LVMVolumeGroup %s is not yet created, adds its requested size", specTp.Name, lvg.Name)) + addingThinPoolSize += tpRequestedSize.Value() + } else { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] thin-pool %s of the LVMVolumeGroup %s is already created, check its requested size", specTp.Name, lvg.Name)) + if tpRequestedSize.Value()+internal.ResizeDelta.Value() < actualThinPool.LVSize.Value() { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] the LVMVolumeGroup %s Spec.ThinPool %s size %s is less than Status one: %s", lvg.Name, specTp.Name, tpRequestedSize.String(), actualThinPool.LVSize.String())) + reason.WriteString(fmt.Sprintf("Requested Spec.ThinPool %s size %s is less than actual one %s. ", specTp.Name, tpRequestedSize.String(), actualThinPool.LVSize.String())) + continue + } + + thinPoolSizeDiff := tpRequestedSize.Value() - actualThinPool.LVSize.Value() + if thinPoolSizeDiff > internal.ResizeDelta.Value() { + r.log.Debug(fmt.Sprintf("[validateLVGForUpdateFunc] the LVMVolumeGroup %s Spec.ThinPool %s size %s more than Status one: %s", lvg.Name, specTp.Name, tpRequestedSize.String(), actualThinPool.LVSize.String())) + addingThinPoolSize += thinPoolSizeDiff + } + } + } + } + + if !hasFullThinPool { + allocatedSize := getVGAllocatedSize(*vg) + totalFreeSpace := newTotalVGSize.Value() - allocatedSize.Value() + r.log.Trace(fmt.Sprintf("[validateLVGForUpdateFunc] new LVMVolumeGroup %s thin-pools requested %d size, additional BlockDevices space %d, total: %d", lvg.Name, addingThinPoolSize, additionBlockDeviceSpace, totalFreeSpace)) + if addingThinPoolSize != 0 && addingThinPoolSize+internal.ResizeDelta.Value() > totalFreeSpace { + reason.WriteString("Added thin-pools requested sizes are more than allowed free space in VG.") + } + } + } + + if reason.Len() != 0 { + return false, reason.String() + } + + return true, "" +} + +func (r *Reconciler) identifyLVGReconcileFunc(lvg *v1alpha1.LVMVolumeGroup) reconcileType { + if r.shouldReconcileLVGByCreateFunc(lvg) { + return CreateReconcile + } + + if r.shouldReconcileLVGByUpdateFunc(lvg) { + return UpdateReconcile + } + + if r.shouldReconcileLVGByDeleteFunc(lvg) { + return DeleteReconcile + } + + return "none" +} + +func (r *Reconciler) shouldReconcileLVGByCreateFunc(lvg *v1alpha1.LVMVolumeGroup) bool { + if lvg.DeletionTimestamp != nil { + return false + } + + vg := r.sdsCache.FindVG(lvg.Spec.ActualVGNameOnTheNode) + return vg == nil +} + +func (r *Reconciler) shouldReconcileLVGByUpdateFunc(lvg *v1alpha1.LVMVolumeGroup) bool { + if lvg.DeletionTimestamp != nil { + return false + } + + vg := r.sdsCache.FindVG(lvg.Spec.ActualVGNameOnTheNode) + return vg != nil +} + +func (r *Reconciler) reconcileThinPoolsIfNeeded( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + vg internal.VGData, + lvs []internal.LVData, +) error { + actualThinPools := make(map[string]internal.LVData, len(lvs)) + for _, lv := range lvs { + if string(lv.LVAttr[0]) == "t" { + actualThinPools[lv.LVName] = lv + } + } + + errs := strings.Builder{} + for _, specTp := range lvg.Spec.ThinPools { + tpRequestedSize, err := getRequestedSizeFromString(specTp.Size, lvg.Status.VGSize) + if err != nil { + r.log.Error(err, fmt.Sprintf("[ReconcileThinPoolsIfNeeded] unable to get requested thin-pool %s size of the LVMVolumeGroup %s", specTp.Name, lvg.Name)) + return err + } + + if actualTp, exist := actualThinPools[specTp.Name]; !exist { + r.log.Debug(fmt.Sprintf("[ReconcileThinPoolsIfNeeded] thin-pool %s of the LVMVolumeGroup %s is not created yet. Create it", specTp.Name, lvg.Name)) + if checkIfConditionIsTrue(lvg, internal.TypeVGConfigurationApplied) { + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonUpdating, "trying to apply the configuration") + if err != nil { + r.log.Error(err, fmt.Sprintf("[ReconcileThinPoolsIfNeeded] unable to add the condition %s status False reason %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, internal.ReasonUpdating, lvg.Name)) + return err + } + } + + var cmd string + start := time.Now() + if utils.AreSizesEqualWithinDelta(tpRequestedSize, lvg.Status.VGSize, internal.ResizeDelta) { + r.log.Debug(fmt.Sprintf("[ReconcileThinPoolsIfNeeded] thin-pool %s of the LVMVolumeGroup %s will be created with size 100FREE", specTp.Name, lvg.Name)) + cmd, err = utils.CreateThinPoolFullVGSpace(specTp.Name, vg.VGName) + } else { + r.log.Debug(fmt.Sprintf("[ReconcileThinPoolsIfNeeded] thin-pool %s of the LVMVolumeGroup %s will be created with size %s", specTp.Name, lvg.Name, tpRequestedSize.String())) + cmd, err = utils.CreateThinPool(specTp.Name, vg.VGName, tpRequestedSize.Value()) + } + r.metrics.UtilsCommandsDuration(Name, "lvcreate").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "lvcreate").Inc() + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "lvcreate").Inc() + r.log.Error(err, fmt.Sprintf("[ReconcileThinPoolsIfNeeded] unable to create thin-pool %s of the LVMVolumeGroup %s, cmd: %s", specTp.Name, lvg.Name, cmd)) + errs.WriteString(fmt.Sprintf("unable to create thin-pool %s, err: %s. ", specTp.Name, err.Error())) + continue + } + + r.log.Info(fmt.Sprintf("[ReconcileThinPoolsIfNeeded] thin-pool %s of the LVMVolumeGroup %s has been successfully created", specTp.Name, lvg.Name)) + } else { + // thin-pool exists + if utils.AreSizesEqualWithinDelta(tpRequestedSize, actualTp.LVSize, internal.ResizeDelta) { + r.log.Debug(fmt.Sprintf("[ReconcileThinPoolsIfNeeded] the LVMVolumeGroup %s requested thin pool %s size is equal to actual one", lvg.Name, tpRequestedSize.String())) + continue + } + + log.Debug(fmt.Sprintf("[ReconcileThinPoolsIfNeeded] the LVMVolumeGroup %s requested thin pool %s size is more than actual one. Resize it", lvg.Name, tpRequestedSize.String())) + if checkIfConditionIsTrue(lvg, internal.TypeVGConfigurationApplied) { + err = r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonUpdating, "trying to apply the configuration") + if err != nil { + r.log.Error(err, fmt.Sprintf("[ReconcileThinPoolsIfNeeded] unable to add the condition %s status False reason %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, internal.ReasonUpdating, lvg.Name)) + return err + } + } + err = r.extendThinPool(lvg, specTp) + if err != nil { + r.log.Error(err, fmt.Sprintf("[ReconcileThinPoolsIfNeeded] unable to resize thin-pool %s of the LVMVolumeGroup %s", specTp.Name, lvg.Name)) + errs.WriteString(fmt.Sprintf("unable to resize thin-pool %s, err: %s. ", specTp.Name, err.Error())) + continue + } + } + } + + if errs.Len() != 0 { + return errors.New(errs.String()) + } + + return nil +} + +func (r *Reconciler) resizePVIfNeeded(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup) error { + if len(lvg.Status.Nodes) == 0 { + r.log.Warning(fmt.Sprintf("[ResizePVIfNeeded] the LVMVolumeGroup %s nodes are empty. Wait for the next update", lvg.Name)) + return nil + } + + errs := strings.Builder{} + for _, n := range lvg.Status.Nodes { + for _, d := range n.Devices { + if d.DevSize.Value()-d.PVSize.Value() > internal.ResizeDelta.Value() { + if checkIfConditionIsTrue(lvg, internal.TypeVGConfigurationApplied) { + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonUpdating, "trying to apply the configuration") + if err != nil { + r.log.Error(err, fmt.Sprintf("[UpdateVGTagIfNeeded] unable to add the condition %s status False reason %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, internal.ReasonUpdating, lvg.Name)) + return err + } + } + + r.log.Debug(fmt.Sprintf("[ResizePVIfNeeded] the LVMVolumeGroup %s BlockDevice %s PVSize is less than actual device size. Resize PV", lvg.Name, d.BlockDevice)) + + start := time.Now() + cmd, err := utils.ResizePV(d.Path) + r.metrics.UtilsCommandsDuration(Name, "pvresize").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "pvresize") + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "pvresize").Inc() + r.log.Error(err, fmt.Sprintf("[ResizePVIfNeeded] unable to resize PV %s of BlockDevice %s of LVMVolumeGroup %s, cmd: %s", d.Path, d.BlockDevice, lvg.Name, cmd)) + errs.WriteString(fmt.Sprintf("unable to resize PV %s, err: %s. ", d.Path, err.Error())) + continue + } + + r.log.Info(fmt.Sprintf("[ResizePVIfNeeded] successfully resized PV %s of BlockDevice %s of LVMVolumeGroup %s", d.Path, d.BlockDevice, lvg.Name)) + } else { + r.log.Debug(fmt.Sprintf("[ResizePVIfNeeded] no need to resize PV %s of BlockDevice %s of the LVMVolumeGroup %s", d.Path, d.BlockDevice, lvg.Name)) + } + } + } + + if errs.Len() != 0 { + return errors.New(errs.String()) + } + + return nil +} + +func (r *Reconciler) extendVGIfNeeded( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + vg internal.VGData, + pvs []internal.PVData, + blockDevices map[string]v1alpha1.BlockDevice, +) error { + for _, n := range lvg.Status.Nodes { + for _, d := range n.Devices { + r.log.Trace(fmt.Sprintf("[ExtendVGIfNeeded] the LVMVolumeGroup %s status block device: %s", lvg.Name, d.BlockDevice)) + } + } + + pvsMap := make(map[string]struct{}, len(pvs)) + for _, pv := range pvs { + pvsMap[pv.PVName] = struct{}{} + } + + devicesToExtend := make([]string, 0, len(blockDevices)) + for _, bd := range blockDevices { + if _, exist := pvsMap[bd.Status.Path]; !exist { + r.log.Debug(fmt.Sprintf("[ExtendVGIfNeeded] the BlockDevice %s of LVMVolumeGroup %s Spec is not counted as used", bd.Name, lvg.Name)) + devicesToExtend = append(devicesToExtend, bd.Name) + } + } + + if len(devicesToExtend) == 0 { + r.log.Debug(fmt.Sprintf("[ExtendVGIfNeeded] VG %s of the LVMVolumeGroup %s should not be extended", vg.VGName, lvg.Name)) + return nil + } + + if checkIfConditionIsTrue(lvg, internal.TypeVGConfigurationApplied) { + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonUpdating, "trying to apply the configuration") + if err != nil { + r.log.Error(err, fmt.Sprintf("[UpdateVGTagIfNeeded] unable to add the condition %s status False reason %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, internal.ReasonUpdating, lvg.Name)) + return err + } + } + + r.log.Debug(fmt.Sprintf("[ExtendVGIfNeeded] VG %s should be extended as there are some BlockDevices were added to Spec field of the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + paths := extractPathsFromBlockDevices(devicesToExtend, blockDevices) + err := r.extendVGComplex(paths, vg.VGName) + if err != nil { + r.log.Error(err, fmt.Sprintf("[ExtendVGIfNeeded] unable to extend VG %s of the LVMVolumeGroup %s", vg.VGName, lvg.Name)) + return err + } + r.log.Info(fmt.Sprintf("[ExtendVGIfNeeded] VG %s of the LVMVolumeGroup %s was extended", vg.VGName, lvg.Name)) + + return nil +} + +func tryGetVG(sdsCache *cache.Cache, vgName string) (bool, internal.VGData) { + vgs, _ := sdsCache.GetVGs() + for _, vg := range vgs { + if vg.VGName == vgName { + return true, vg + } + } + + return false, internal.VGData{} +} + +func (r *Reconciler) removeLVGFinalizerIfExist(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup) (bool, error) { + if !slices.Contains(lvg.Finalizers, internal.SdsNodeConfiguratorFinalizer) { + return false, nil + } + + for i := range lvg.Finalizers { + if lvg.Finalizers[i] == internal.SdsNodeConfiguratorFinalizer { + lvg.Finalizers = append(lvg.Finalizers[:i], lvg.Finalizers[i+1:]...) + break + } + } + + err := r.cl.Update(ctx, lvg) + if err != nil { + return false, err + } + + return true, nil +} + +func (r *Reconciler) getLVForVG(vgName string) []string { + lvs, _ := r.sdsCache.GetLVs() + usedLVs := make([]string, 0, len(lvs)) + for _, lv := range lvs { + if lv.VGName == vgName { + usedLVs = append(usedLVs, lv.LVName) + } + } + + return usedLVs +} + +func (r *Reconciler) getLVMVolumeGroup(ctx context.Context, name string) (*v1alpha1.LVMVolumeGroup, error) { + obj := &v1alpha1.LVMVolumeGroup{} + start := time.Now() + err := r.cl.Get(ctx, client.ObjectKey{ + Name: name, + }, obj) + r.metrics.APIMethodsDuration(Name, "get").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.APIMethodsExecutionCount(Name, "get").Inc() + if err != nil { + r.metrics.APIMethodsErrors(Name, "get").Inc() + return nil, err + } + return obj, nil +} + +func (r *Reconciler) deleteVGIfExist(vgName string) error { + vgs, _ := r.sdsCache.GetVGs() + if !checkIfVGExist(vgName, vgs) { + log.Debug(fmt.Sprintf("[DeleteVGIfExist] no VG %s found, nothing to delete", vgName)) + return nil + } + + pvs, _ := r.sdsCache.GetPVs() + if len(pvs) == 0 { + err := errors.New("no any PV found") + log.Error(err, fmt.Sprintf("[DeleteVGIfExist] no any PV was found while deleting VG %s", vgName)) + return err + } + + start := time.Now() + command, err := utils.RemoveVG(vgName) + r.metrics.UtilsCommandsDuration(Name, "vgremove").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "vgremove").Inc() + r.log.Debug(command) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "vgremove").Inc() + r.log.Error(err, "RemoveVG "+command) + return err + } + r.log.Debug(fmt.Sprintf("[DeleteVGIfExist] VG %s was successfully deleted from the node", vgName)) + var pvsToRemove []string + for _, pv := range pvs { + if pv.VGName == vgName { + pvsToRemove = append(pvsToRemove, pv.PVName) + } + } + + start = time.Now() + command, err = utils.RemovePV(pvsToRemove) + r.metrics.UtilsCommandsDuration(Name, "pvremove").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "pvremove").Inc() + r.log.Debug(command) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "pvremove").Inc() + r.log.Error(err, "RemovePV "+command) + return err + } + r.log.Debug(fmt.Sprintf("[DeleteVGIfExist] successfully delete PVs of VG %s from the node", vgName)) + + return nil +} + +func (r *Reconciler) extendVGComplex(extendPVs []string, vgName string) error { + for _, pvPath := range extendPVs { + start := time.Now() + command, err := utils.CreatePV(pvPath) + r.metrics.UtilsCommandsDuration(Name, "pvcreate").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "pvcreate").Inc() + r.log.Debug(command) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "pvcreate").Inc() + r.log.Error(err, "CreatePV ") + return err + } + } + + start := time.Now() + command, err := utils.ExtendVG(vgName, extendPVs) + r.metrics.UtilsCommandsDuration(Name, "vgextend").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "vgextend").Inc() + r.log.Debug(command) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "vgextend").Inc() + r.log.Error(err, "ExtendVG ") + return err + } + return nil +} + +func (r *Reconciler) createVGComplex(lvg *v1alpha1.LVMVolumeGroup, blockDevices map[string]v1alpha1.BlockDevice) error { + paths := extractPathsFromBlockDevices(nil, blockDevices) + + r.log.Trace(fmt.Sprintf("[CreateVGComplex] LVMVolumeGroup %s devices paths %v", lvg.Name, paths)) + for _, path := range paths { + start := time.Now() + command, err := utils.CreatePV(path) + r.metrics.UtilsCommandsDuration(Name, "pvcreate").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "pvcreate").Inc() + r.log.Debug(command) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "pvcreate").Inc() + r.log.Error(err, fmt.Sprintf("[CreateVGComplex] unable to create PV by path %s", path)) + return err + } + } + + r.log.Debug(fmt.Sprintf("[CreateVGComplex] successfully created all PVs for the LVMVolumeGroup %s", lvg.Name)) + r.log.Debug(fmt.Sprintf("[CreateVGComplex] the LVMVolumeGroup %s type is %s", lvg.Name, lvg.Spec.Type)) + switch lvg.Spec.Type { + case Local: + start := time.Now() + cmd, err := utils.CreateVGLocal(lvg.Spec.ActualVGNameOnTheNode, lvg.Name, paths) + r.metrics.UtilsCommandsDuration(Name, "vgcreate").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "vgcreate").Inc() + log.Debug(cmd) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "vgcreate").Inc() + log.Error(err, "error CreateVGLocal") + return err + } + case Shared: + start := time.Now() + cmd, err := utils.CreateVGShared(lvg.Spec.ActualVGNameOnTheNode, lvg.Name, paths) + r.metrics.UtilsCommandsDuration(Name, "vgcreate").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "vgcreate").Inc() + r.log.Debug(cmd) + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "vgcreate").Inc() + r.log.Error(err, "error CreateVGShared") + return err + } + } + + r.log.Debug(fmt.Sprintf("[CreateVGComplex] successfully create VG %s of the LVMVolumeGroup %s", lvg.Spec.ActualVGNameOnTheNode, lvg.Name)) + + return nil +} + +func (r *Reconciler) updateVGTagIfNeeded( + ctx context.Context, + lvg *v1alpha1.LVMVolumeGroup, + vg internal.VGData, +) (bool, error) { + found, tagName := checkTag(vg.VGTags) + if found && lvg.Name != tagName { + if checkIfConditionIsTrue(lvg, internal.TypeVGConfigurationApplied) { + err := r.updateLVGConditionIfNeeded(ctx, lvg, v1.ConditionFalse, internal.TypeVGConfigurationApplied, internal.ReasonUpdating, "trying to apply the configuration") + if err != nil { + log.Error(err, fmt.Sprintf("[UpdateVGTagIfNeeded] unable to add the condition %s status False reason %s to the LVMVolumeGroup %s", internal.TypeVGConfigurationApplied, internal.ReasonUpdating, lvg.Name)) + return false, err + } + } + + start := time.Now() + cmd, err := utils.VGChangeDelTag(vg.VGName, fmt.Sprintf("%s=%s", LVMVolumeGroupTag, tagName)) + r.metrics.UtilsCommandsDuration(Name, "vgchange").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "vgchange").Inc() + log.Debug(fmt.Sprintf("[UpdateVGTagIfNeeded] exec cmd: %s", cmd)) + if err != nil { + log.Error(err, fmt.Sprintf("[UpdateVGTagIfNeeded] unable to delete LVMVolumeGroupTag: %s=%s, vg: %s", LVMVolumeGroupTag, tagName, vg.VGName)) + r.metrics.UtilsCommandsErrorsCount(Name, "vgchange").Inc() + return false, err + } + + start = time.Now() + cmd, err = utils.VGChangeAddTag(vg.VGName, fmt.Sprintf("%s=%s", LVMVolumeGroupTag, lvg.Name)) + r.metrics.UtilsCommandsDuration(Name, "vgchange").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "vgchange").Inc() + log.Debug(fmt.Sprintf("[UpdateVGTagIfNeeded] exec cmd: %s", cmd)) + if err != nil { + log.Error(err, fmt.Sprintf("[UpdateVGTagIfNeeded] unable to add LVMVolumeGroupTag: %s=%s, vg: %s", LVMVolumeGroupTag, lvg.Name, vg.VGName)) + r.metrics.UtilsCommandsErrorsCount(Name, "vgchange").Inc() + return false, err + } + + return true, nil + } + + return false, nil +} + +func (r *Reconciler) extendThinPool(lvg *v1alpha1.LVMVolumeGroup, specThinPool v1alpha1.LVMVolumeGroupThinPoolSpec) error { + volumeGroupFreeSpaceBytes := lvg.Status.VGSize.Value() - lvg.Status.AllocatedSize.Value() + tpRequestedSize, err := getRequestedSizeFromString(specThinPool.Size, lvg.Status.VGSize) + if err != nil { + return err + } + + r.log.Trace(fmt.Sprintf("[ExtendThinPool] volumeGroupSize = %s", lvg.Status.VGSize.String())) + r.log.Trace(fmt.Sprintf("[ExtendThinPool] volumeGroupAllocatedSize = %s", lvg.Status.AllocatedSize.String())) + r.log.Trace(fmt.Sprintf("[ExtendThinPool] volumeGroupFreeSpaceBytes = %d", volumeGroupFreeSpaceBytes)) + + r.log.Info(fmt.Sprintf("[ExtendThinPool] start resizing thin pool: %s; with new size: %s", specThinPool.Name, tpRequestedSize.String())) + + var cmd string + start := time.Now() + if utils.AreSizesEqualWithinDelta(tpRequestedSize, lvg.Status.VGSize, internal.ResizeDelta) { + r.log.Debug(fmt.Sprintf("[ExtendThinPool] thin-pool %s of the LVMVolumeGroup %s will be extend to size 100VG", specThinPool.Name, lvg.Name)) + cmd, err = utils.ExtendLVFullVGSpace(lvg.Spec.ActualVGNameOnTheNode, specThinPool.Name) + } else { + r.log.Debug(fmt.Sprintf("[ExtendThinPool] thin-pool %s of the LVMVolumeGroup %s will be extend to size %s", specThinPool.Name, lvg.Name, tpRequestedSize.String())) + cmd, err = utils.ExtendLV(tpRequestedSize.Value(), lvg.Spec.ActualVGNameOnTheNode, specThinPool.Name) + } + r.metrics.UtilsCommandsDuration(Name, "lvextend").Observe(r.metrics.GetEstimatedTimeInSeconds(start)) + r.metrics.UtilsCommandsExecutionCount(Name, "lvextend").Inc() + if err != nil { + r.metrics.UtilsCommandsErrorsCount(Name, "lvextend").Inc() + r.log.Error(err, fmt.Sprintf("[ExtendThinPool] unable to extend LV, name: %s, cmd: %s", specThinPool.Name, cmd)) + return err + } + + return nil +} + +func (r *Reconciler) addLVGLabelIfNeeded(ctx context.Context, lvg *v1alpha1.LVMVolumeGroup, labelKey, labelValue string) (bool, error) { + if !r.shouldUpdateLVGLabels(lvg, labelKey, labelValue) { + return false, nil + } + + if lvg.Labels == nil { + lvg.Labels = make(map[string]string) + } + + lvg.Labels[labelKey] = labelValue + err := r.cl.Update(ctx, lvg) + if err != nil { + return false, err + } + + return true, nil +} diff --git a/images/agent/src/pkg/controller/lvm_logical_volume_snapshot_watcher.go b/images/agent/src/pkg/controller/lvm_logical_volume_snapshot_watcher.go new file mode 100644 index 00000000..8dea5c32 --- /dev/null +++ b/images/agent/src/pkg/controller/lvm_logical_volume_snapshot_watcher.go @@ -0,0 +1,490 @@ +package controller + +import ( + "agent/config" + "agent/internal" + "agent/pkg/cache" + "agent/pkg/logger" + "agent/pkg/monitoring" + "agent/pkg/utils" + "context" + "errors" + "fmt" + "reflect" + + "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/google/go-cmp/cmp" + k8serr "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +const ( + lvmLogicalVolumeSnapshotWatcherCtrlName = "lvm-logical-volume-snapshot-watcher-controller" +) + +func RunLVMLogicalVolumeSnapshotWatcherController( + mgr manager.Manager, + cfg config.Options, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, +) (controller.Controller, error) { + cl := mgr.GetClient() + + c, err := controller.New(lvmLogicalVolumeSnapshotWatcherCtrlName, mgr, controller.Options{ + Reconciler: reconcile.Func(func(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] Reconciler starts reconciliation of the LVMLogicalVolume: %s", request.Name)) + + log.Debug(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] tries to get the LVMLogicalVolume %s", request.Name)) + llv := &v1alpha1.LVMLogicalVolume{} + err := cl.Get(ctx, request.NamespacedName, llv) + if err != nil { + if k8serr.IsNotFound(err) { + log.Debug(fmt.Sprintf("[ReconcileLVMLogicalVolume] LVMLogicalVolume %s not found. Object has probably been deleted", request.NamespacedName)) + return reconcile.Result{}, nil + } + return reconcile.Result{}, err + } + + lvg, err := getLVMVolumeGroup(ctx, cl, metrics, llv.Spec.LVMVolumeGroupName) + if err != nil { + if k8serr.IsNotFound(err) { + log.Error(err, fmt.Sprintf("[ReconcileLVMLogicalVolume] LVMVolumeGroup %s not found for LVMLogicalVolume %s. Retry in %s", llv.Spec.LVMVolumeGroupName, llv.Name, cfg.VolumeGroupScanIntervalSec.String())) + err = updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhaseFailed, fmt.Sprintf("LVMVolumeGroup %s not found", llv.Spec.LVMVolumeGroupName)) + if err != nil { + log.Error(err, fmt.Sprintf("[ReconcileLVMLogicalVolume] unable to update the LVMLogicalVolume %s", llv.Name)) + return reconcile.Result{}, err + } + + return reconcile.Result{ + RequeueAfter: cfg.VolumeGroupScanIntervalSec, + }, nil + } + + err = updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhaseFailed, fmt.Sprintf("Unable to get selected LVMVolumeGroup, err: %s", err.Error())) + if err != nil { + log.Error(err, fmt.Sprintf("[ReconcileLVMLogicalVolume] unable to update the LVMLogicalVolume %s", llv.Name)) + } + return reconcile.Result{}, err + } + + if !belongsToNode(lvg, cfg.NodeName) { + log.Info(fmt.Sprintf("[ReconcileLVMLogicalVolume] the LVMVolumeGroup %s of the LVMLogicalVolume %s does not belongs to the current node: %s. Reconciliation stopped", lvg.Name, llv.Name, cfg.NodeName)) + return reconcile.Result{}, nil + } + log.Info(fmt.Sprintf("[ReconcileLVMLogicalVolume] the LVMVolumeGroup %s of the LVMLogicalVolume %s belongs to the current node: %s. Reconciliation continues", lvg.Name, llv.Name, cfg.NodeName)) + + // this case prevents the unexpected behavior when the controller runs up with existing LVMLogicalVolumes + if vgs, _ := sdsCache.GetVGs(); len(vgs) == 0 { + log.Warning(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] unable to reconcile the request as no VG was found in the cache. Retry in %s", cfg.VolumeGroupScanIntervalSec.String())) + return reconcile.Result{RequeueAfter: cfg.VolumeGroupScanIntervalSec}, nil + } + + log.Debug(fmt.Sprintf("[ReconcileLVMLogicalVolume] tries to add the finalizer %s to the LVMLogicalVolume %s", internal.SdsNodeConfiguratorFinalizer, llv.Name)) + added, err := addLLVFinalizerIfNotExist(ctx, cl, log, metrics, llv) + if err != nil { + log.Error(err, fmt.Sprintf("[ReconcileLVMLogicalVolume] unable to update the LVMLogicalVolume %s", llv.Name)) + return reconcile.Result{}, err + } + if added { + log.Debug(fmt.Sprintf("[ReconcileLVMLogicalVolume] successfully added the finalizer %s to the LVMLogicalVolume %s", internal.SdsNodeConfiguratorFinalizer, llv.Name)) + } else { + log.Debug(fmt.Sprintf("[ReconcileLVMLogicalVolume] no need to add the finalizer %s to the LVMLogicalVolume %s", internal.SdsNodeConfiguratorFinalizer, llv.Name)) + } + + log.Info(fmt.Sprintf("[ReconcileLVMLogicalVolume] starts to validate the LVMLogicalVolume %s", llv.Name)) + valid, reason := validateLVMLogicalVolume(sdsCache, llv, lvg) + if !valid { + log.Warning(fmt.Sprintf("[ReconcileLVMLogicalVolume] the LVMLogicalVolume %s is not valid, reason: %s", llv.Name, reason)) + err = updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhaseFailed, reason) + if err != nil { + log.Error(err, fmt.Sprintf("[ReconcileLVMLogicalVolume] unable to update the LVMLogicalVolume %s", llv.Name)) + return reconcile.Result{}, err + } + + return reconcile.Result{}, nil + } + log.Info(fmt.Sprintf("[ReconcileLVMLogicalVolume] successfully validated the LVMLogicalVolume %s", llv.Name)) + + shouldRequeue, err := ReconcileLVMLogicalVolume(ctx, cl, log, metrics, sdsCache, llv, lvg) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] an error occurred while reconciling the LVMLogicalVolume: %s", request.Name)) + updErr := updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhaseFailed, err.Error()) + if updErr != nil { + log.Error(updErr, fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] unable to update the LVMLogicalVolume %s", llv.Name)) + return reconcile.Result{}, updErr + } + } + if shouldRequeue { + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] some issues were occurred while reconciliation the LVMLogicalVolume %s. Requeue the request in %s", request.Name, cfg.LLVRequeueIntervalSec.String())) + return reconcile.Result{RequeueAfter: cfg.LLVRequeueIntervalSec}, nil + } + + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] successfully ended reconciliation of the LVMLogicalVolume %s", request.Name)) + return reconcile.Result{}, nil + }), + MaxConcurrentReconciles: 10, + }) + + if err != nil { + log.Error(err, "[RunLVMLogicalVolumeWatcherController] unable to create controller") + return nil, err + } + + err = c.Watch(source.Kind(mgr.GetCache(), &v1alpha1.LVMLogicalVolume{}, handler.TypedFuncs[*v1alpha1.LVMLogicalVolume, reconcile.Request]{ + CreateFunc: func(_ context.Context, e event.TypedCreateEvent[*v1alpha1.LVMLogicalVolume], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] got a create event for the LVMLogicalVolume: %s", e.Object.GetName())) + request := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: e.Object.GetNamespace(), Name: e.Object.GetName()}} + q.Add(request) + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] added the request of the LVMLogicalVolume %s to Reconciler", e.Object.GetName())) + }, + + UpdateFunc: func(_ context.Context, e event.TypedUpdateEvent[*v1alpha1.LVMLogicalVolume], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] got an update event for the LVMLogicalVolume: %s", e.ObjectNew.GetName())) + // TODO: Figure out how to log it in our logger. + if cfg.Loglevel == "4" { + fmt.Println("==============START DIFF==================") + fmt.Println(cmp.Diff(e.ObjectOld, e.ObjectNew)) + fmt.Println("==============END DIFF==================") + } + + if reflect.DeepEqual(e.ObjectOld.Spec, e.ObjectNew.Spec) && e.ObjectNew.DeletionTimestamp == nil { + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] no target changes were made for the LVMLogicalVolume %s. No need to reconcile the request", e.ObjectNew.Name)) + return + } + + request := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: e.ObjectNew.Namespace, Name: e.ObjectNew.Name}} + q.Add(request) + log.Info(fmt.Sprintf("[RunLVMLogicalVolumeWatcherController] added the request of the LVMLogicalVolume %s to Reconciler", e.ObjectNew.GetName())) + }, + })) + + if err != nil { + log.Error(err, "[RunLVMLogicalVolumeWatcherController] the controller is unable to watch") + return nil, err + } + + return c, err +} + +func ReconcileLVMLogicalVolumeSnapshot( + ctx context.Context, + cl client.Client, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, + llv *v1alpha1.LVMLogicalVolume, + lvg *v1alpha1.LVMVolumeGroup, +) (bool, error) { + log.Debug(fmt.Sprintf("[ReconcileLVMLogicalVolume] starts the reconciliation for the LVMLogicalVolume %s", llv.Name)) + + log.Debug(fmt.Sprintf("[ReconcileLVMLogicalVolume] tries to identify the reconciliation type for the LVMLogicalVolume %s", llv.Name)) + log.Trace(fmt.Sprintf("[ReconcileLVMLogicalVolume] %+v", llv)) + + switch identifyReconcileFunc(sdsCache, lvg.Spec.ActualVGNameOnTheNode, llv) { + case CreateReconcile: + return reconcileLLVCreateFunc(ctx, cl, log, metrics, sdsCache, llv, lvg) + case UpdateReconcile: + return reconcileLLVUpdateFunc(ctx, cl, log, metrics, sdsCache, llv, lvg) + case DeleteReconcile: + return reconcileLLVDeleteFunc(ctx, cl, log, metrics, sdsCache, llv, lvg) + default: + log.Info(fmt.Sprintf("[runEventReconcile] the LVMLogicalVolume %s has compeleted configuration and should not be reconciled", llv.Name)) + if llv.Status.Phase != LLVStatusPhaseCreated { + log.Warning(fmt.Sprintf("[runEventReconcile] the LVMLogicalVolume %s should not be reconciled but has an unexpected phase: %s. Setting the phase to %s", llv.Name, llv.Status.Phase, LLVStatusPhaseCreated)) + err := updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhaseCreated, "") + if err != nil { + return true, err + } + } + } + + return false, nil +} + +func reconcileLLVSCreateFunc( + ctx context.Context, + cl client.Client, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, + llv *v1alpha1.LVMLogicalVolume, + lvg *v1alpha1.LVMVolumeGroup, +) (bool, error) { + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] starts reconciliation for the LVMLogicalVolume %s", llv.Name)) + + // this check prevents infinite resource updating after retries + if llv.Status == nil { + err := updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhasePending, "") + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to update the LVMLogicalVolume %s", llv.Name)) + return true, err + } + } + llvRequestSize, err := getLLVRequestedSize(llv, lvg) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to get LVMLogicalVolume %s requested size", llv.Name)) + return false, err + } + + freeSpace := getFreeLVGSpaceForLLV(lvg, llv) + log.Trace(fmt.Sprintf("[reconcileLLVCreateFunc] the LVMLogicalVolume %s, LV: %s, VG: %s type: %s requested size: %s, free space: %s", llv.Name, llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, llv.Spec.Type, llvRequestSize.String(), freeSpace.String())) + + if !utils.AreSizesEqualWithinDelta(llvRequestSize, freeSpace, internal.ResizeDelta) { + if freeSpace.Value() < llvRequestSize.Value()+internal.ResizeDelta.Value() { + err = errors.New("not enough space") + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] the LV %s requested size %s of the LVMLogicalVolume %s is more than the actual free space %s", llv.Spec.ActualLVNameOnTheNode, llvRequestSize.String(), llv.Name, freeSpace.String())) + + // we return true cause the user might manage LVMVolumeGroup free space without changing the LLV + return true, err + } + } + + var cmd string + switch llv.Spec.Type { + case Thick: + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] LV %s will be created in VG %s with size: %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, llvRequestSize.String())) + cmd, err = utils.CreateThickLogicalVolume(lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode, llvRequestSize.Value(), isContiguous(llv)) + case Thin: + if llv.Spec.Source == nil { + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] LV %s of the LVMLogicalVolume %s will be created in Thin-pool %s with size %s", llv.Spec.ActualLVNameOnTheNode, llv.Name, llv.Spec.Thin.PoolName, llvRequestSize.String())) + cmd, err = utils.CreateThinLogicalVolume(lvg.Spec.ActualVGNameOnTheNode, llv.Spec.Thin.PoolName, llv.Spec.ActualLVNameOnTheNode, llvRequestSize.Value()) + } else { + // volume is a clone + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] Snapshot (for source %s) LV %s of the LVMLogicalVolume %s will be created in Thin-pool %s with size %s", llv.Spec.Source.Name, llv.Spec.ActualLVNameOnTheNode, llv.Name, llv.Spec.Thin.PoolName, llvRequestSize.String())) + + var sourceVgName, sourceVolumeName string + if llv.Spec.Source.Kind == "LVMLogicalVolume" { + sourceLlv := &v1alpha1.LVMLogicalVolume{} + if err = cl.Get(ctx, types.NamespacedName{Name: llv.Spec.Source.Name}, sourceLlv); err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to find source LVMLogicalVolume %s", llv.Spec.Source.Name)) + return false, err + } + + sourceVolumeName = sourceLlv.Spec.ActualLVNameOnTheNode + sourceVgName = sourceLlv.Spec.LVMVolumeGroupName + + // TODO snapshots: validate source llv + } else if llv.Spec.Source.Kind == "LVMLogicalVolumeSnapshot" { + sourceSnapshot := &v1alpha1.LVMLogicalVolumeSnapshot{} + if err = cl.Get(ctx, types.NamespacedName{Name: llv.Spec.Source.Name}, sourceSnapshot); err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to find source LVMLogicalVolumeSnapshot %s", llv.Spec.Source.Name)) + return false, err + } + sourceVolumeName = sourceSnapshot.Spec.ActualLVNameOnTheNode + sourceVgName = sourceSnapshot.Spec.LVMVolumeGroupName + // TODO snapshots: validate source snapshot + } else { + return false, fmt.Errorf("source kind is not supported: %s", llv.Spec.Source.Kind) + } + cmd, err = utils.CreateThinLogicalVolumeSnapshot(llv.Spec.ActualLVNameOnTheNode, sourceVgName, sourceVolumeName) + + } + } + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] runs cmd: %s", cmd)) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to create a %s LogicalVolume for the LVMLogicalVolume %s", llv.Spec.Type, llv.Name)) + return true, err + } + + log.Info(fmt.Sprintf("[reconcileLLVCreateFunc] successfully created LV %s in VG %s for LVMLogicalVolume resource with name: %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, llv.Name)) + + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] adds the LV %s to the cache", llv.Spec.ActualLVNameOnTheNode)) + sdsCache.AddLV(lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode) + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] tries to get the LV %s actual size", llv.Spec.ActualLVNameOnTheNode)) + actualSize := getLVActualSize(sdsCache, lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode) + if actualSize.Value() == 0 { + log.Warning(fmt.Sprintf("[reconcileLLVCreateFunc] unable to get actual size for LV %s in VG %s (likely LV was not found in the cache), retry...", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode)) + return true, nil + } + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] successfully got the LV %s actual size", llv.Spec.ActualLVNameOnTheNode)) + log.Trace(fmt.Sprintf("[reconcileLLVCreateFunc] the LV %s in VG: %s has actual size: %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, actualSize.String())) + + updated, err := updateLLVPhaseToCreatedIfNeeded(ctx, cl, llv, actualSize) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to update the LVMLogicalVolume %s", llv.Name)) + return true, err + } + + if updated { + log.Info(fmt.Sprintf("[reconcileLLVCreateFunc] successfully updated the LVMLogicalVolume %s status phase to Created", llv.Name)) + } else { + log.Warning(fmt.Sprintf("[reconcileLLVCreateFunc] LVMLogicalVolume %s status phase was not updated to Created due to the resource has already have the same phase", llv.Name)) + } + + log.Info(fmt.Sprintf("[reconcileLLVCreateFunc] successfully ended the reconciliation for the LVMLogicalVolume %s", llv.Name)) + return false, nil +} + +func reconcileLLVSUpdateFunc( + ctx context.Context, + cl client.Client, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, + llv *v1alpha1.LVMLogicalVolume, + lvg *v1alpha1.LVMVolumeGroup, +) (bool, error) { + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] starts reconciliation for the LVMLogicalVolume %s", llv.Name)) + + // status might be nil if a user creates the resource with LV name which matches existing LV on the node + if llv.Status == nil { + err := updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhasePending, "") + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] unable to update the LVMLogicalVolume %s", llv.Name)) + return true, err + } + } + + // it needs to get current LV size from the node as status might be nil + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] tries to get LVMLogicalVolume %s actual size before the extension", llv.Name)) + actualSize := getLVActualSize(sdsCache, lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode) + if actualSize.Value() == 0 { + log.Warning(fmt.Sprintf("[reconcileLLVUpdateFunc] LV %s of the LVMLogicalVolume %s has zero size (likely LV was not updated in the cache) ", llv.Spec.ActualLVNameOnTheNode, llv.Name)) + return true, nil + } + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully got LVMLogicalVolume %s actual size %s before the extension", llv.Name, actualSize.String())) + + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] tries to count the LVMLogicalVolume %s requested size", llv.Name)) + llvRequestSize, err := getLLVRequestedSize(llv, lvg) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to get LVMLogicalVolume %s requested size", llv.Name)) + return false, err + } + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully counted the LVMLogicalVolume %s requested size: %s", llv.Name, llvRequestSize.String())) + + if utils.AreSizesEqualWithinDelta(actualSize, llvRequestSize, internal.ResizeDelta) { + log.Warning(fmt.Sprintf("[reconcileLLVUpdateFunc] the LV %s in VG %s has the same actual size %s as the requested size %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, actualSize.String(), llvRequestSize.String())) + + updated, err := updateLLVPhaseToCreatedIfNeeded(ctx, cl, llv, actualSize) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] unable to update the LVMLogicalVolume %s", llv.Name)) + return true, err + } + + if updated { + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully updated the LVMLogicalVolume %s status phase to Created", llv.Name)) + } else { + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] no need to update the LVMLogicalVolume %s status phase to Created", llv.Name)) + } + + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully ended reconciliation for the LVMLogicalVolume %s", llv.Name)) + + return false, nil + } + + extendingSize := subtractQuantity(llvRequestSize, actualSize) + log.Trace(fmt.Sprintf("[reconcileLLVUpdateFunc] the LV %s in VG %s has extending size %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, extendingSize.String())) + if extendingSize.Value() < 0 { + err = fmt.Errorf("specified LV size %dB is less than actual one on the node %dB", llvRequestSize.Value(), actualSize.Value()) + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] unable to extend the LVMLogicalVolume %s", llv.Name)) + return false, err + } + + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] the LVMLogicalVolume %s should be resized", llv.Name)) + // this check prevents infinite resource updates after retry + if llv.Status.Phase != Failed { + err := updateLVMLogicalVolumePhaseIfNeeded(ctx, cl, log, metrics, llv, LLVStatusPhaseResizing, "") + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] unable to update the LVMLogicalVolume %s", llv.Name)) + return true, err + } + } + + freeSpace := getFreeLVGSpaceForLLV(lvg, llv) + log.Trace(fmt.Sprintf("[reconcileLLVUpdateFunc] the LVMLogicalVolume %s, LV: %s, VG: %s, type: %s, extending size: %s, free space: %s", llv.Name, llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, llv.Spec.Type, extendingSize.String(), freeSpace.String())) + + if !utils.AreSizesEqualWithinDelta(freeSpace, extendingSize, internal.ResizeDelta) { + if freeSpace.Value() < extendingSize.Value()+internal.ResizeDelta.Value() { + err = errors.New("not enough space") + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] the LV %s requested size %s of the LVMLogicalVolume %s is more than actual free space %s", llv.Spec.ActualLVNameOnTheNode, llvRequestSize.String(), llv.Name, freeSpace.String())) + + // returns true cause a user might manage LVG free space without changing the LLV + return true, err + } + } + + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] LV %s of the LVMLogicalVolume %s will be extended with size: %s", llv.Spec.ActualLVNameOnTheNode, llv.Name, llvRequestSize.String())) + cmd, err := utils.ExtendLV(llvRequestSize.Value(), lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode) + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] runs cmd: %s", cmd)) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] unable to ExtendLV, name: %s, type: %s", llv.Spec.ActualLVNameOnTheNode, llv.Spec.Type)) + return true, err + } + + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully extended LV %s in VG %s for LVMLogicalVolume resource with name: %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, llv.Name)) + + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] tries to get LVMLogicalVolume %s actual size after the extension", llv.Name)) + newActualSize := getLVActualSize(sdsCache, lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode) + + // this case might be triggered if sds cache will not update lv state in time + if newActualSize.Value() == actualSize.Value() { + log.Warning(fmt.Sprintf("[reconcileLLVUpdateFunc] LV %s of the LVMLogicalVolume %s was extended but cache is not updated yet. It will be retried", llv.Spec.ActualLVNameOnTheNode, llv.Name)) + return true, nil + } + + log.Debug(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully got LVMLogicalVolume %s actual size before the extension", llv.Name)) + log.Trace(fmt.Sprintf("[reconcileLLVUpdateFunc] the LV %s in VG %s actual size %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, newActualSize.String())) + + // need this here as a user might create the LLV with existing LV + updated, err := updateLLVPhaseToCreatedIfNeeded(ctx, cl, llv, newActualSize) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVUpdateFunc] unable to update the LVMLogicalVolume %s", llv.Name)) + return true, err + } + + if updated { + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully updated the LVMLogicalVolume %s status phase to Created", llv.Name)) + } else { + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] no need to update the LVMLogicalVolume %s status phase to Created", llv.Name)) + } + + log.Info(fmt.Sprintf("[reconcileLLVUpdateFunc] successfully ended reconciliation for the LVMLogicalVolume %s", llv.Name)) + return false, nil +} + +func reconcileLLVSDeleteFunc( + ctx context.Context, + cl client.Client, + log logger.Logger, + metrics monitoring.Metrics, + sdsCache *cache.Cache, + llv *v1alpha1.LVMLogicalVolume, + lvg *v1alpha1.LVMVolumeGroup, +) (bool, error) { + log.Debug(fmt.Sprintf("[reconcileLLVDeleteFunc] starts reconciliation for the LVMLogicalVolume %s", llv.Name)) + + // The controller won't remove the LLV resource and LV volume till the resource has any other finalizer. + if len(llv.Finalizers) != 0 { + if len(llv.Finalizers) > 1 || + llv.Finalizers[0] != internal.SdsNodeConfiguratorFinalizer { + log.Debug(fmt.Sprintf("[reconcileLLVDeleteFunc] unable to delete LVMLogicalVolume %s for now due to it has any other finalizer", llv.Name)) + return false, nil + } + } + + err := deleteLVIfNeeded(log, sdsCache, lvg.Spec.ActualVGNameOnTheNode, llv) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVDeleteFunc] unable to delete the LV %s in VG %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode)) + return true, err + } + + log.Info(fmt.Sprintf("[reconcileLLVDeleteFunc] successfully deleted the LV %s in VG %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode)) + + err = removeLLVFinalizersIfExist(ctx, cl, metrics, log, llv) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVDeleteFunc] unable to remove finalizers from the LVMVolumeGroup %s", llv.Name)) + return true, err + } + + log.Info(fmt.Sprintf("[reconcileLLVDeleteFunc] successfully ended reconciliation for the LVMLogicalVolume %s", llv.Name)) + return false, nil +} diff --git a/images/agent/src/pkg/controller/lvm_logical_volume_watcher.go b/images/agent/src/pkg/controller/lvm_logical_volume_watcher.go index afc78e47..56cf530e 100644 --- a/images/agent/src/pkg/controller/lvm_logical_volume_watcher.go +++ b/images/agent/src/pkg/controller/lvm_logical_volume_watcher.go @@ -31,10 +31,6 @@ const ( Thick = "Thick" Thin = "Thin" - CreateReconcile reconcileType = "Create" - UpdateReconcile reconcileType = "Update" - DeleteReconcile reconcileType = "Delete" - lvmLogicalVolumeWatcherCtrlName = "lvm-logical-volume-watcher-controller" LLVStatusPhaseCreated = "Created" @@ -43,10 +39,6 @@ const ( LLVStatusPhaseFailed = "Failed" ) -type ( - reconcileType string -) - func RunLVMLogicalVolumeWatcherController( mgr manager.Manager, cfg config.Options, @@ -263,17 +255,39 @@ func reconcileLLVCreateFunc( log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] LV %s will be created in VG %s with size: %s", llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, llvRequestSize.String())) cmd, err = utils.CreateThickLogicalVolume(lvg.Spec.ActualVGNameOnTheNode, llv.Spec.ActualLVNameOnTheNode, llvRequestSize.Value(), isContiguous(llv)) case Thin: - if llv.Spec.Source == "" { - log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] LV %s of the LVMLogicalVolume %s will be create in Thin-pool %s with size %s", llv.Spec.ActualLVNameOnTheNode, llv.Name, llv.Spec.Thin.PoolName, llvRequestSize.String())) + if llv.Spec.Source == nil { + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] LV %s of the LVMLogicalVolume %s will be created in Thin-pool %s with size %s", llv.Spec.ActualLVNameOnTheNode, llv.Name, llv.Spec.Thin.PoolName, llvRequestSize.String())) cmd, err = utils.CreateThinLogicalVolume(lvg.Spec.ActualVGNameOnTheNode, llv.Spec.Thin.PoolName, llv.Spec.ActualLVNameOnTheNode, llvRequestSize.Value()) } else { - log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] Snapshot (for source %s) LV %s of the LVMLogicalVolume %s will be create in Thin-pool %s with size %s", llv.Spec.Source, llv.Spec.ActualLVNameOnTheNode, llv.Name, llv.Spec.Thin.PoolName, llvRequestSize.String())) - sourceLlv := &v1alpha1.LVMLogicalVolume{} - if err = cl.Get(ctx, types.NamespacedName{Namespace: llv.Namespace, Name: llv.Spec.Source}, sourceLlv); err != nil { - log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to find source LogicalVolume %s (%s)", llv.Spec.Source, llv.Namespace)) + // volume is a clone + log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] Snapshot (for source %s) LV %s of the LVMLogicalVolume %s will be created in Thin-pool %s with size %s", llv.Spec.Source.Name, llv.Spec.ActualLVNameOnTheNode, llv.Name, llv.Spec.Thin.PoolName, llvRequestSize.String())) + + var sourceVgName, sourceVolumeName string + if llv.Spec.Source.Kind == "LVMLogicalVolume" { + sourceLlv := &v1alpha1.LVMLogicalVolume{} + if err = cl.Get(ctx, types.NamespacedName{Name: llv.Spec.Source.Name}, sourceLlv); err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to find source LVMLogicalVolume %s", llv.Spec.Source.Name)) + return false, err + } + + sourceVolumeName = sourceLlv.Spec.ActualLVNameOnTheNode + sourceVgName = sourceLlv.Spec.LVMVolumeGroupName + + // TODO snapshots: validate source llv + } else if llv.Spec.Source.Kind == "LVMLogicalVolumeSnapshot" { + sourceSnapshot := &v1alpha1.LVMLogicalVolumeSnapshot{} + if err = cl.Get(ctx, types.NamespacedName{Name: llv.Spec.Source.Name}, sourceSnapshot); err != nil { + log.Error(err, fmt.Sprintf("[reconcileLLVCreateFunc] unable to find source LVMLogicalVolumeSnapshot %s", llv.Spec.Source.Name)) + return false, err + } + sourceVolumeName = sourceSnapshot.Spec.ActualLVNameOnTheNode + sourceVgName = sourceSnapshot.Spec.LVMVolumeGroupName + // TODO snapshots: validate source snapshot } else { - cmd, err = utils.CreateThinLogicalVolumeSnapshot(llv.Spec.ActualLVNameOnTheNode, lvg.Spec.ActualVGNameOnTheNode, sourceLlv) + return false, fmt.Errorf("source kind is not supported: %s", llv.Spec.Source.Kind) } + cmd, err = utils.CreateThinLogicalVolumeSnapshot(llv.Spec.ActualLVNameOnTheNode, sourceVgName, sourceVolumeName) + } } log.Debug(fmt.Sprintf("[reconcileLLVCreateFunc] runs cmd: %s", cmd)) diff --git a/images/agent/src/pkg/scanner/scanner.go b/images/agent/src/pkg/scanner/scanner.go index a44bec86..f77bc572 100644 --- a/images/agent/src/pkg/scanner/scanner.go +++ b/images/agent/src/pkg/scanner/scanner.go @@ -16,12 +16,20 @@ import ( "agent/internal" "agent/pkg/cache" "agent/pkg/controller" + "agent/pkg/controller/bd" "agent/pkg/logger" "agent/pkg/throttler" "agent/pkg/utils" ) -func RunScanner(ctx context.Context, log logger.Logger, cfg config.Options, sdsCache *cache.Cache, bdCtrl, lvgDiscoverCtrl kubeCtrl.Controller) error { +func RunScanner( + ctx context.Context, + log logger.Logger, + cfg config.Options, + sdsCache *cache.Cache, + bdCtrl func(context.Context) (controller.Result, error), + lvgDiscoverCtrl kubeCtrl.Controller, +) error { log.Info("[RunScanner] starts the work") t := throttler.New(cfg.ThrottleIntervalSec) @@ -114,11 +122,16 @@ func RunScanner(ctx context.Context, log logger.Logger, cfg config.Options, sdsC } } -func runControllersReconcile(ctx context.Context, log logger.Logger, bdCtrl, lvgDiscoverCtrl kubeCtrl.Controller) error { - log.Info(fmt.Sprintf("[runControllersReconcile] run %s reconcile", controller.BlockDeviceCtrlName)) - bdRes, err := bdCtrl.Reconcile(ctx, reconcile.Request{}) +func runControllersReconcile( + ctx context.Context, + log logger.Logger, + bdCtrl func(context.Context) (controller.Result, error), + lvgDiscoverCtrl kubeCtrl.Controller, +) error { + log.Info(fmt.Sprintf("[runControllersReconcile] run %s reconcile", bd.Name)) + bdRes, err := bdCtrl(ctx) if err != nil { - log.Error(err, fmt.Sprintf("[runControllersReconcile] an error occurred while %s reconcile", controller.BlockDeviceCtrlName)) + log.Error(err, fmt.Sprintf("[runControllersReconcile] an error occurred while %s reconcile", bd.Name)) return err } @@ -127,14 +140,14 @@ func runControllersReconcile(ctx context.Context, log logger.Logger, bdCtrl, lvg for bdRes.RequeueAfter > 0 { log.Warning(fmt.Sprintf("[runControllersReconcile] BlockDevices reconcile needs a retry in %s", bdRes.RequeueAfter.String())) time.Sleep(bdRes.RequeueAfter) - bdRes, err = bdCtrl.Reconcile(ctx, reconcile.Request{}) + bdRes, err = bdCtrl(ctx) } log.Info("[runControllersReconcile] successfully reconciled BlockDevices after a retry") }() } - log.Info(fmt.Sprintf("[runControllersReconcile] run %s successfully reconciled", controller.BlockDeviceCtrlName)) + log.Info(fmt.Sprintf("[runControllersReconcile] run %s successfully reconciled", bd.Name)) log.Info(fmt.Sprintf("[runControllersReconcile] run %s reconcile", controller.LVMVolumeGroupDiscoverCtrlName)) lvgRes, err := lvgDiscoverCtrl.Reconcile(ctx, reconcile.Request{}) diff --git a/images/agent/src/pkg/test_utils/fake_client.go b/images/agent/src/pkg/test_utils/fake_client.go new file mode 100644 index 00000000..d66aefb6 --- /dev/null +++ b/images/agent/src/pkg/test_utils/fake_client.go @@ -0,0 +1,20 @@ +package test_utils + +import ( + "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func NewFakeClient() client.WithWatch { + s := scheme.Scheme + _ = metav1.AddMetaToScheme(s) + _ = v1alpha1.AddToScheme(s) + + builder := fake.NewClientBuilder().WithScheme(s) + + cl := builder.Build() + return cl +} diff --git a/images/agent/src/pkg/utils/commands.go b/images/agent/src/pkg/utils/commands.go index 796fa88a..86c8c006 100644 --- a/images/agent/src/pkg/utils/commands.go +++ b/images/agent/src/pkg/utils/commands.go @@ -25,10 +25,12 @@ import ( golog "log" "os/exec" "regexp" + "strings" + "time" "agent/internal" - - "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "agent/pkg/logger" + "agent/pkg/monitoring" ) func GetBlockDevices(ctx context.Context) ([]internal.Device, string, bytes.Buffer, error) { @@ -273,8 +275,8 @@ func CreateThinPoolFullVGSpace(thinPoolName, vgName string) (string, error) { return cmd.String(), nil } -func CreateThinLogicalVolumeSnapshot(name string, vgName string, sourceLlv *v1alpha1.LVMLogicalVolume) (string, error) { - args := []string{"lvcreate", "-s", "-kn", "-n", name, fmt.Sprintf("%s/%s", vgName, sourceLlv.Name), "-y"} +func CreateThinLogicalVolumeSnapshot(name string, sourceVgName string, sourceName string) (string, error) { + args := []string{"lvcreate", "-s", "-kn", "-n", name, fmt.Sprintf("%s/%s", sourceVgName, sourceName), "-y"} extendedArgs := lvmStaticExtendedArgs(args) cmd := exec.Command(internal.NSENTERCmd, extendedArgs...) @@ -489,6 +491,103 @@ func UnmarshalDevices(out []byte) ([]internal.Device, error) { return devices.BlockDevices, nil } +func ReTag(ctx context.Context, log logger.Logger, metrics monitoring.Metrics, ctrlName string) error { + // thin pool + log.Debug("[ReTag] start re-tagging LV") + start := time.Now() + lvs, cmdStr, _, err := GetAllLVs(ctx) + metrics.UtilsCommandsDuration(ctrlName, "lvs").Observe(metrics.GetEstimatedTimeInSeconds(start)) + metrics.UtilsCommandsExecutionCount(ctrlName, "lvs").Inc() + log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) + if err != nil { + metrics.UtilsCommandsErrorsCount(ctrlName, "lvs").Inc() + log.Error(err, "[ReTag] unable to GetAllLVs") + return err + } + + for _, lv := range lvs { + tags := strings.Split(lv.LvTags, ",") + for _, tag := range tags { + if strings.Contains(tag, internal.LVMTags[0]) { + continue + } + + if strings.Contains(tag, internal.LVMTags[1]) { + start = time.Now() + cmdStr, err = LVChangeDelTag(lv, tag) + metrics.UtilsCommandsDuration(ctrlName, "lvchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) + metrics.UtilsCommandsExecutionCount(ctrlName, "lvchange").Inc() + log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) + if err != nil { + metrics.UtilsCommandsErrorsCount(ctrlName, "lvchange").Inc() + log.Error(err, "[ReTag] unable to LVChangeDelTag") + return err + } + + start = time.Now() + cmdStr, err = VGChangeAddTag(lv.VGName, internal.LVMTags[0]) + metrics.UtilsCommandsDuration(ctrlName, "vgchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) + metrics.UtilsCommandsExecutionCount(ctrlName, "vgchange").Inc() + log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) + if err != nil { + metrics.UtilsCommandsErrorsCount(ctrlName, "vgchange").Inc() + log.Error(err, "[ReTag] unable to VGChangeAddTag") + return err + } + } + } + } + log.Debug("[ReTag] end re-tagging LV") + + log.Debug("[ReTag] start re-tagging LVM") + start = time.Now() + vgs, cmdStr, _, err := GetAllVGs(ctx) + metrics.UtilsCommandsDuration(ctrlName, "vgs").Observe(metrics.GetEstimatedTimeInSeconds(start)) + metrics.UtilsCommandsExecutionCount(ctrlName, "vgs").Inc() + log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) + if err != nil { + metrics.UtilsCommandsErrorsCount(ctrlName, cmdStr).Inc() + log.Error(err, "[ReTag] unable to GetAllVGs") + return err + } + + for _, vg := range vgs { + tags := strings.Split(vg.VGTags, ",") + for _, tag := range tags { + if strings.Contains(tag, internal.LVMTags[0]) { + continue + } + + if strings.Contains(tag, internal.LVMTags[1]) { + start = time.Now() + cmdStr, err = VGChangeDelTag(vg.VGName, tag) + metrics.UtilsCommandsDuration(ctrlName, "vgchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) + metrics.UtilsCommandsExecutionCount(ctrlName, "vgchange").Inc() + log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) + if err != nil { + metrics.UtilsCommandsErrorsCount(ctrlName, "vgchange").Inc() + log.Error(err, "[ReTag] unable to VGChangeDelTag") + return err + } + + start = time.Now() + cmdStr, err = VGChangeAddTag(vg.VGName, internal.LVMTags[0]) + metrics.UtilsCommandsDuration(ctrlName, "vgchange").Observe(metrics.GetEstimatedTimeInSeconds(start)) + metrics.UtilsCommandsExecutionCount(ctrlName, "vgchange").Inc() + log.Debug(fmt.Sprintf("[ReTag] exec cmd: %s", cmdStr)) + if err != nil { + metrics.UtilsCommandsErrorsCount(ctrlName, "vgchange").Inc() + log.Error(err, "[ReTag] unable to VGChangeAddTag") + return err + } + } + } + } + log.Debug("[ReTag] stop re-tagging LVM") + + return nil +} + func unmarshalPVs(out []byte) ([]internal.PVData, error) { var pvR internal.PVReport