From 166ca59a85101d0fe17ec7cb0004342db1d5f95b Mon Sep 17 00:00:00 2001 From: Praveenrajmani Date: Wed, 23 Oct 2024 12:53:06 +0530 Subject: [PATCH] Add support for bulk-init based on ellipses --- cmd/directpv/bulk-init.go | 147 ++++++++++++++++ cmd/directpv/main.go | 1 + pkg/device/probe.go | 5 + pkg/initrequest/event.go | 20 +-- resources/init/ClusterRole.yaml | 28 +++ resources/init/ClusterRoleBinding.yaml | 19 ++ resources/init/Job.yaml | 71 ++++++++ resources/init/Namespace.yaml | 12 ++ resources/init/ServiceAccount.yaml | 10 ++ .../init/directpvdrives.directpv.min.io.yaml | 166 ++++++++++++++++++ resources/init/kustomization.yaml | 14 ++ 11 files changed, 483 insertions(+), 10 deletions(-) create mode 100644 cmd/directpv/bulk-init.go create mode 100644 resources/init/ClusterRole.yaml create mode 100644 resources/init/ClusterRoleBinding.yaml create mode 100644 resources/init/Job.yaml create mode 100644 resources/init/Namespace.yaml create mode 100644 resources/init/ServiceAccount.yaml create mode 100644 resources/init/directpvdrives.directpv.min.io.yaml create mode 100644 resources/init/kustomization.yaml diff --git a/cmd/directpv/bulk-init.go b/cmd/directpv/bulk-init.go new file mode 100644 index 000000000..70587028c --- /dev/null +++ b/cmd/directpv/bulk-init.go @@ -0,0 +1,147 @@ +// This file is part of MinIO DirectPV +// Copyright (c) 2024 MinIO, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package main + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + "sync" + + directpvtypes "github.com/minio/directpv/pkg/apis/directpv.min.io/types" + "github.com/minio/directpv/pkg/consts" + devicepkg "github.com/minio/directpv/pkg/device" + "github.com/minio/directpv/pkg/ellipsis" + "github.com/minio/directpv/pkg/initrequest" + "github.com/minio/directpv/pkg/sys" + "github.com/minio/directpv/pkg/utils" + "github.com/spf13/cobra" + "k8s.io/klog/v2" +) + +var drivesArgs []string +var initAll bool + +var bulkInitCmd = &cobra.Command{ + Use: "bulk-init --drives [DRIVE-ELLIPSIS...]", + Short: "Bulk initialize the devices", + SilenceUsage: true, + SilenceErrors: true, + Hidden: true, + RunE: func(c *cobra.Command, _ []string) error { + if err := sys.Mkdir(consts.MountRootDir, 0o755); err != nil && !errors.Is(err, os.ErrExist) { + return err + } + switch len(drivesArgs) { + case 0: + return errors.New("--drives must be provided") + case 1: + if drivesArgs[0] == "*" { + initAll = true + } + } + + var drives []string + for i := range drivesArgs { + drivesArgs[i] = strings.TrimSpace(utils.TrimDevPrefix(drivesArgs[i])) + if drivesArgs[i] == "" { + return fmt.Errorf("empty drive name") + } + result, err := ellipsis.Expand(drivesArgs[i]) + if err != nil { + return err + } + drives = append(drives, result...) + } + if !initAll && len(drives) == 0 { + return errors.New("invalid ellipsis input; no drives selected") + } + return startBulkInit(c.Context(), drives) + }, +} + +func init() { + bulkInitCmd.PersistentFlags().BoolVar(&dryRunFlag, "dry-run", dryRunFlag, "No modify mode") + bulkInitCmd.PersistentFlags().StringSliceVarP(&drivesArgs, "drives", "d", drivesArgs, "drives to be initialized; supports ellipses pattern e.g. sd{a...z}") +} + +func startBulkInit(ctx context.Context, drives []string) error { + var cancel context.CancelFunc + ctx, cancel = context.WithCancel(ctx) + defer cancel() + + initRequestHandler, err := initrequest.NewHandler( + ctx, + nodeID, + map[string]string{ + string(directpvtypes.TopologyDriverIdentity): identity, + string(directpvtypes.TopologyDriverRack): rack, + string(directpvtypes.TopologyDriverZone): zone, + string(directpvtypes.TopologyDriverRegion): region, + string(directpvtypes.TopologyDriverNode): string(nodeID), + }, + ) + if err != nil { + return fmt.Errorf("unable to create initrequest handler; %v", err) + } + + devices, err := devicepkg.Probe() + if err != nil { + return fmt.Errorf("unable to probe devices; %v", err) + } + + var filteredDevices []devicepkg.Device + for _, device := range devices { + klog.Infoln(device.Name) + if !device.Available() { + continue + } + if initAll || utils.Contains(drives, device.Name) { + filteredDevices = append(filteredDevices, device) + } + } + + if len(filteredDevices) == 0 { + return errors.New("no available drives selected to initialize") + } + + var wg sync.WaitGroup + var failed bool + for i := range filteredDevices { + wg.Add(1) + go func(i int, device devicepkg.Device, force bool) { + defer wg.Done() + if dryRunFlag { + klog.Infof("\n[DRY-RUN] initializing device %v with force: %v", device.Name, force) + return + } + if err := initRequestHandler.InitDevice(device, force); err != nil { + failed = true + klog.ErrorS(err, "unable to init device %v", device.Name) + } + }(i, filteredDevices[i], filteredDevices[i].FSType() != "") + } + wg.Wait() + + if failed { + return errors.New("failed to initialize all the drives") + } + + return nil +} diff --git a/cmd/directpv/main.go b/cmd/directpv/main.go index b733ebe28..fd802b45f 100644 --- a/cmd/directpv/main.go +++ b/cmd/directpv/main.go @@ -129,6 +129,7 @@ func init() { mainCmd.AddCommand(legacyNodeServerCmd) mainCmd.AddCommand(nodeControllerCmd) mainCmd.AddCommand(repairCmd) + mainCmd.AddCommand(bulkInitCmd) } func main() { diff --git a/pkg/device/probe.go b/pkg/device/probe.go index 16ac61c56..f6d6dd368 100644 --- a/pkg/device/probe.go +++ b/pkg/device/probe.go @@ -165,6 +165,11 @@ func (d Device) deniedReason() string { return reason } +// Available denotes if the device is available for initialization +func (d Device) Available() bool { + return d.deniedReason() == "" +} + // ToNodeDevice constructs the NodeDevice object from Device info. func (d Device) ToNodeDevice(nodeID directpvtypes.NodeID) types.Device { return types.Device{ diff --git a/pkg/initrequest/event.go b/pkg/initrequest/event.go index 4759ee490..cd6b77bde 100644 --- a/pkg/initrequest/event.go +++ b/pkg/initrequest/event.go @@ -46,7 +46,7 @@ const ( resyncPeriod = 5 * time.Minute ) -type initRequestEventHandler struct { +type InitRequestHandler struct { nodeID directpvtypes.NodeID reflink bool topology map[string]string @@ -64,7 +64,7 @@ type initRequestEventHandler struct { mu sync.Mutex } -func newInitRequestEventHandler(ctx context.Context, nodeID directpvtypes.NodeID, topology map[string]string) (*initRequestEventHandler, error) { +func NewHandler(ctx context.Context, nodeID directpvtypes.NodeID, topology map[string]string) (*InitRequestHandler, error) { reflink, err := reflinkSupported(ctx) if err != nil { return nil, err @@ -76,7 +76,7 @@ func newInitRequestEventHandler(ctx context.Context, nodeID directpvtypes.NodeID klog.V(3).Infof("XFS reflink support is disabled") } - return &initRequestEventHandler{ + return &InitRequestHandler{ reflink: reflink, nodeID: nodeID, topology: topology, @@ -129,7 +129,7 @@ func newInitRequestEventHandler(ctx context.Context, nodeID directpvtypes.NodeID }, nil } -func (handler *initRequestEventHandler) ListerWatcher() cache.ListerWatcher { +func (handler *InitRequestHandler) ListerWatcher() cache.ListerWatcher { labelSelector := fmt.Sprintf("%s=%s", directpvtypes.NodeLabelKey, handler.nodeID) return cache.NewFilteredListWatchFromClient( client.RESTClient(), @@ -141,11 +141,11 @@ func (handler *initRequestEventHandler) ListerWatcher() cache.ListerWatcher { ) } -func (handler *initRequestEventHandler) ObjectType() runtime.Object { +func (handler *InitRequestHandler) ObjectType() runtime.Object { return &types.InitRequest{} } -func (handler *initRequestEventHandler) Handle(ctx context.Context, eventType controller.EventType, object runtime.Object) error { +func (handler *InitRequestHandler) Handle(ctx context.Context, eventType controller.EventType, object runtime.Object) error { switch eventType { case controller.UpdateEvent, controller.AddEvent: initRequest := object.(*types.InitRequest) @@ -157,7 +157,7 @@ func (handler *initRequestEventHandler) Handle(ctx context.Context, eventType co return nil } -func (handler *initRequestEventHandler) initDevices(ctx context.Context, req *types.InitRequest) error { +func (handler *InitRequestHandler) initDevices(ctx context.Context, req *types.InitRequest) error { handler.mu.Lock() defer handler.mu.Unlock() @@ -196,7 +196,7 @@ func (handler *initRequestEventHandler) initDevices(ctx context.Context, req *ty wg.Add(1) go func(i int, device pkgdevice.Device, force bool) { defer wg.Done() - if err := handler.initDevice(device, force); err != nil { + if err := handler.InitDevice(device, force); err != nil { results[i].Error = err.Error() } }(i, device, req.Spec.Devices[i].Force) @@ -223,7 +223,7 @@ func updateInitRequest(ctx context.Context, name string, results []types.InitDev return retry.RetryOnConflict(retry.DefaultRetry, updateFunc) } -func (handler *initRequestEventHandler) initDevice(device pkgdevice.Device, force bool) error { +func (handler *InitRequestHandler) InitDevice(device pkgdevice.Device, force bool) error { devPath := utils.AddDevPrefix(device.Name) deviceMap, majorMinorMap, err := handler.getMounts() @@ -295,7 +295,7 @@ func (handler *initRequestEventHandler) initDevice(device pkgdevice.Device, forc // StartController starts initrequest controller. func StartController(ctx context.Context, nodeID directpvtypes.NodeID, identity, rack, zone, region string) { - initRequestHandler, err := newInitRequestEventHandler( + initRequestHandler, err := NewHandler( ctx, nodeID, map[string]string{ diff --git a/resources/init/ClusterRole.yaml b/resources/init/ClusterRole.yaml new file mode 100644 index 000000000..45fa9e273 --- /dev/null +++ b/resources/init/ClusterRole.yaml @@ -0,0 +1,28 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: + rbac.authorization.kubernetes.io/autoupdate: "true" + creationTimestamp: null + labels: + directpv.min.io/created-by: bulk-init + directpv.min.io/version: v1beta1 + name: directpv-bulk-init +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - list + - patch + - update + - watch +- apiGroups: + - directpv.min.io + resources: + - directpvdrives + verbs: + - create + - get diff --git a/resources/init/ClusterRoleBinding.yaml b/resources/init/ClusterRoleBinding.yaml new file mode 100644 index 000000000..4a9b317f9 --- /dev/null +++ b/resources/init/ClusterRoleBinding.yaml @@ -0,0 +1,19 @@ + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + annotations: + rbac.authorization.kubernetes.io/autoupdate: "true" + creationTimestamp: null + labels: + directpv.min.io/created-by: bulk-init + directpv.min.io/version: v1beta1 + name: directpv-bulk-init +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: directpv-bulk-init +subjects: +- kind: ServiceAccount + name: directpv-bulk-init + namespace: directpv diff --git a/resources/init/Job.yaml b/resources/init/Job.yaml new file mode 100644 index 000000000..59e2e1b44 --- /dev/null +++ b/resources/init/Job.yaml @@ -0,0 +1,71 @@ + +apiVersion: batch/v1 +kind: Job +metadata: + labels: + directpv.min.io/created-by: bulk-init + directpv.min.io/version: v1beta1 + name: bulk-init + namespace: directpv +spec: + ttlSecondsAfterFinished: 3600 + backoffLimit: 0 + template: + metadata: + name: bulk-init + namespace: directpv + spec: + containers: + - args: + - bulk-init + - -v=3 + - --kube-node-name=$(KUBE_NODE_NAME) + - --drives=$(DRIVE_ELLIPSES) + env: + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + image: quay.io/minio/directpv@sha256:83fd05fe114ed15c3975333c90cbe18c782d9c4d5c7ad6fdb8cc835e380ba505 + name: bulk-init + resources: {} + securityContext: + privileged: true + terminationMessagePath: /var/log/driver-termination-log + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/lib/directpv/ + mountPropagation: Bidirectional + name: directpv-common-root + - mountPath: /sys + mountPropagation: Bidirectional + name: sysfs + - mountPath: /dev + mountPropagation: HostToContainer + name: devfs + readOnly: true + - mountPath: /run/udev/data + mountPropagation: Bidirectional + name: run-udev-data-dir + readOnly: true + restartPolicy: Never + hostPID: true + serviceAccountName: directpv-bulk-init + volumes: + - hostPath: + path: /var/lib/directpv/ + type: DirectoryOrCreate + name: directpv-common-root + - hostPath: + path: /sys + type: DirectoryOrCreate + name: sysfs + - hostPath: + path: /dev + type: DirectoryOrCreate + name: devfs + - hostPath: + path: /run/udev/data + type: DirectoryOrCreate + name: run-udev-data-dir diff --git a/resources/init/Namespace.yaml b/resources/init/Namespace.yaml new file mode 100644 index 000000000..04253c153 --- /dev/null +++ b/resources/init/Namespace.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Namespace +metadata: + creationTimestamp: null + finalizers: + - foregroundDeletion + labels: + directpv.min.io/version: v1beta1 + pod-security.kubernetes.io/enforce: privileged + name: directpv +spec: {} +status: {} diff --git a/resources/init/ServiceAccount.yaml b/resources/init/ServiceAccount.yaml new file mode 100644 index 000000000..b871e7778 --- /dev/null +++ b/resources/init/ServiceAccount.yaml @@ -0,0 +1,10 @@ + +apiVersion: v1 +kind: ServiceAccount +metadata: + creationTimestamp: null + labels: + directpv.min.io/created-by: bulk-init + directpv.min.io/version: v1beta1 + name: directpv-bulk-init + namespace: directpv diff --git a/resources/init/directpvdrives.directpv.min.io.yaml b/resources/init/directpvdrives.directpv.min.io.yaml new file mode 100644 index 000000000..a242f4a36 --- /dev/null +++ b/resources/init/directpvdrives.directpv.min.io.yaml @@ -0,0 +1,166 @@ + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + creationTimestamp: null + labels: + directpv.min.io/version: v1beta1 + name: directpvdrives.directpv.min.io +spec: + conversion: + strategy: None + group: directpv.min.io + names: + kind: DirectPVDrive + listKind: DirectPVDriveList + plural: directpvdrives + singular: directpvdrive + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: DirectPVDrive denotes drive CRD object. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: DriveSpec represents DirectPV drive specification values. + properties: + relabel: + type: boolean + unschedulable: + type: boolean + type: object + status: + description: DriveStatus denotes drive information. + properties: + allocatedCapacity: + format: int64 + type: integer + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + freeCapacity: + format: int64 + type: integer + fsuuid: + type: string + make: + type: string + status: + description: DriveStatus denotes drive status + type: string + topology: + additionalProperties: + type: string + type: object + totalCapacity: + format: int64 + type: integer + required: + - allocatedCapacity + - freeCapacity + - fsuuid + - status + - topology + - totalCapacity + type: object + required: + - metadata + - status + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null diff --git a/resources/init/kustomization.yaml b/resources/init/kustomization.yaml new file mode 100644 index 000000000..6149f2be6 --- /dev/null +++ b/resources/init/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - Namespace.yaml + - directpvdrives.directpv.min.io.yaml + - ClusterRole.yaml + - ClusterRoleBinding.yaml + - ServiceAccount.yaml + - Job.yaml + +images: + - name: quay.io/minio/directpv + digest: sha256:83fd05fe114ed15c3975333c90cbe18c782d9c4d5c7ad6fdb8cc835e380ba505