Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add role/rolebinding to let nimcache and nimservice instances to use "nonroot" scc #54

Merged
merged 4 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions api/apps/v1alpha1/nimcache_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,16 @@ func (n *NIMCache) GetGroupID() *int64 {
return n.Spec.GroupID
}

// GetTolerations returns tolerations configured for the NIMCache Job
func (n *NIMCache) GetTolerations() []corev1.Toleration {
return n.Spec.Tolerations
}

// GetNodeSelectors returns nodeselectors configured for the NIMCache Job
func (n *NIMCache) GetNodeSelectors() map[string]string {
return n.Spec.NodeSelectors
}

// +kubebuilder:object:root=true

// NIMCacheList contains a list of NIMCache
Expand Down
14 changes: 12 additions & 2 deletions api/apps/v1alpha1/nimservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)
Expand Down Expand Up @@ -187,7 +188,7 @@ func (n *NIMService) GetStandardEnv() []corev1.EnvVar {
// GetStandardAnnotations returns default annotations to apply to the NIMService instance
func (n *NIMService) GetStandardAnnotations() map[string]string {
standardAnnotations := map[string]string{
"openshift.io/scc": "anyuid",
"openshift.io/scc": "nonroot",
}
return standardAnnotations
}
Expand Down Expand Up @@ -633,7 +634,16 @@ func (n *NIMService) GetRoleParams() *rendertypes.RoleParams {
params.Name = n.GetName()
params.Namespace = n.GetNamespace()

// TODO: set rules
// Set rules to use SCC
params.Rules = []rbacv1.PolicyRule{
{
APIGroups: []string{"security.openshift.io"},
Resources: []string{"securitycontextconstraints"},
ResourceNames: []string{"nonroot"},
Verbs: []string{"use"},
},
}

return params
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,14 @@ spec:
- get
- list
- watch
- apiGroups:
- security.openshift.io
resourceNames:
- nonroot
resources:
- securitycontextconstraints
verbs:
- use
- apiGroups:
- security.openshift.io
resources:
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,14 @@ rules:
- patch
- update
- watch
- apiGroups:
- security.openshift.io
resourceNames:
- nonroot
resources:
- securitycontextconstraints
verbs:
- use
- apiGroups:
- storage.k8s.io
resources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,14 @@ rules:
- get
- list
- watch
- apiGroups:
- security.openshift.io
resourceNames:
- nonroot
resources:
- securitycontextconstraints
verbs:
- use
- apiGroups:
- security.openshift.io
resources:
Expand Down
220 changes: 217 additions & 3 deletions internal/controller/nimcache_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"gopkg.in/yaml.v2"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
apiResource "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -60,6 +61,15 @@ const (

// AllProfiles represents all profiles in the NIM manifest
AllProfiles = "all"

// NIMCacheRole is the name of the role for all NIMCache instances in the namespace
NIMCacheRole = "nim-cache-role"

// NIMCacheRoleBinding is the name of the rolebinding for all NIMCache instances in the namespace
NIMCacheRoleBinding = "nim-cache-rolebinding"

// NIMCacheServiceAccount is the name of the serviceaccount for all NIMCache instances in the namespace
NIMCacheServiceAccount = "nim-cache-sa"
)

// NIMCacheReconciler reconciles a NIMCache object
Expand Down Expand Up @@ -87,6 +97,7 @@ func NewNIMCacheReconciler(client client.Client, scheme *runtime.Scheme, log log
// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nimcaches,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nimcaches/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nimcaches/finalizers,verbs=update
// +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=use,resourceNames=nonroot
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;delete
// +kubebuilder:rbac:groups="",resources=pods/log,verbs=get
Expand Down Expand Up @@ -226,6 +237,182 @@ func (r *NIMCacheReconciler) cleanupNIMCache(ctx context.Context, nimCache *apps
return nil
}

func (r *NIMCacheReconciler) reconcileRole(ctx context.Context, nimCache *appsv1alpha1.NIMCache) error {
logger := r.GetLogger()
roleName := NIMCacheRole
roleNamespacedName := types.NamespacedName{Name: roleName, Namespace: nimCache.GetNamespace()}

// Desired Role configuration
desiredRole := &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: roleName,
Namespace: nimCache.GetNamespace(),
Labels: map[string]string{
"app": "k8s-nim-operator",
},
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"security.openshift.io"},
Resources: []string{"securitycontextconstraints"},
ResourceNames: []string{"nonroot"},
Verbs: []string{"use"},
},
},
}

// Check if the Role already exists
existingRole := &rbacv1.Role{}
err := r.Get(ctx, roleNamespacedName, existingRole)
if err != nil && client.IgnoreNotFound(err) != nil {
logger.Error(err, "Failed to get Role", "Name", roleName)
return err
}

if err != nil {
// Role does not exist, create a new one
logger.Info("Creating a new Role", "Name", roleName)

err = r.Create(ctx, desiredRole)
if err != nil {
logger.Error(err, "Failed to create Role", "Name", roleName)
return err
}

logger.Info("Successfully created Role", "Name", roleName)
} else {
// Role exists, check if it needs to be updated
if !roleEqual(existingRole, desiredRole) {
logger.Info("Updating existing Role", "Name", roleName)
existingRole.Rules = desiredRole.Rules

err = r.Update(ctx, existingRole)
if err != nil {
logger.Error(err, "Failed to update Role", "Name", roleName)
return err
}

logger.Info("Successfully updated Role", "Name", roleName)
}
}

return nil
}

// Helper function to check if two Roles are equal
func roleEqual(existing, desired *rbacv1.Role) bool {
return utils.IsEqual(existing, desired, "Rules")
}

func (r *NIMCacheReconciler) reconcileRoleBinding(ctx context.Context, nimCache *appsv1alpha1.NIMCache) error {
logger := r.GetLogger()
rbName := NIMCacheRoleBinding
rbNamespacedName := types.NamespacedName{Name: rbName, Namespace: nimCache.GetNamespace()}

// Desired RoleBinding configuration
desiredRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: rbName,
Namespace: nimCache.GetNamespace(),
Labels: map[string]string{
"app": "k8s-nim-operator",
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "Role",
Name: NIMCacheRole,
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: NIMCacheServiceAccount,
Namespace: nimCache.GetNamespace(),
},
},
}

// Check if the RoleBinding already exists
existingRB := &rbacv1.RoleBinding{}
err := r.Get(ctx, rbNamespacedName, existingRB)
if err != nil && client.IgnoreNotFound(err) != nil {
logger.Error(err, "Failed to get RoleBinding", "Name", rbName)
return err
}

if err != nil {
// RoleBinding does not exist, create a new one
logger.Info("Creating a new RoleBinding", "Name", rbName)

err = r.Create(ctx, desiredRB)
if err != nil {
logger.Error(err, "Failed to create RoleBinding", "Name", rbName)
return err
}

logger.Info("Successfully created RoleBinding", "Name", rbName)
} else {
// RoleBinding exists, check if it needs to be updated
if !roleBindingEqual(existingRB, desiredRB) {
logger.Info("Updating existing RoleBinding", "Name", rbName)
existingRB.RoleRef = desiredRB.RoleRef
existingRB.Subjects = desiredRB.Subjects

err = r.Update(ctx, existingRB)
if err != nil {
logger.Error(err, "Failed to update RoleBinding", "Name", rbName)
return err
}

logger.Info("Successfully updated RoleBinding", "Name", rbName)
}
}

return nil
}

// Helper function to check if two RoleBindings are equal
func roleBindingEqual(existing, desired *rbacv1.RoleBinding) bool {
return utils.IsEqual(existing, desired, "RoleRef", "Subjects")
}

func (r *NIMCacheReconciler) reconcileServiceAccount(ctx context.Context, nimCache *appsv1alpha1.NIMCache) error {
logger := r.GetLogger()
saName := NIMCacheServiceAccount
saNamespacedName := types.NamespacedName{Name: saName, Namespace: nimCache.GetNamespace()}

sa := &corev1.ServiceAccount{}
err := r.Get(ctx, saNamespacedName, sa)
if err != nil && client.IgnoreNotFound(err) != nil {
return err
}

// If ServiceAccount does not exist, create a new one
if err != nil {
logger.Info("Creating a new ServiceAccount", "Name", saName)

newSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: saName,
Namespace: nimCache.GetNamespace(),
Labels: map[string]string{"app": "k8s-nim-operator"},
},
}

// Create the ServiceAccount
err = r.Create(ctx, newSA)
if err != nil {
logger.Error(err, "Failed to create ServiceAccount", "Name", saName)
return err
}

logger.Info("Successfully created ServiceAccount", "Name", saName)
}

// If the ServiceAccount already exists, no action is needed
return nil
}

func (r *NIMCacheReconciler) reconcilePVC(ctx context.Context, nimCache *appsv1alpha1.NIMCache) error {
logger := r.GetLogger()
pvcName := getPvcName(nimCache, nimCache.Spec.Storage.PVC)
Expand Down Expand Up @@ -540,8 +727,29 @@ func (r *NIMCacheReconciler) createPod(ctx context.Context, pod *corev1.Pod) err
func (r *NIMCacheReconciler) reconcileNIMCache(ctx context.Context, nimCache *appsv1alpha1.NIMCache) (ctrl.Result, error) {
logger := r.GetLogger()

// Reconcile ServiceAccount
err := r.reconcileServiceAccount(ctx, nimCache)
if err != nil {
logger.Error(err, "reconciliation of serviceaccount failed")
return ctrl.Result{}, err
}

// Reconcile Role
err = r.reconcileRole(ctx, nimCache)
if err != nil {
logger.Error(err, "reconciliation of role failed")
return ctrl.Result{}, err
}

// Reconcile RoleBinding
err = r.reconcileRoleBinding(ctx, nimCache)
if err != nil {
logger.Error(err, "reconciliation of rolebinding failed")
return ctrl.Result{}, err
}

// Reconcile PVC
err := r.reconcilePVC(ctx, nimCache)
err = r.reconcilePVC(ctx, nimCache)
if err != nil {
logger.Error(err, "reconciliation of pvc failed", "pvc", getPvcName(nimCache, nimCache.Spec.Storage.PVC))
return ctrl.Result{}, err
Expand Down Expand Up @@ -597,7 +805,7 @@ func constructPodSpec(nimCache *appsv1alpha1.NIMCache) *corev1.Pod {
}

annotations := map[string]string{
"openshift.io/scc": "anyuid",
"openshift.io/scc": "nonroot",
}

pod := &corev1.Pod{
Expand Down Expand Up @@ -629,6 +837,9 @@ func constructPodSpec(nimCache *appsv1alpha1.NIMCache) *corev1.Pod {
FSGroup: nimCache.GetGroupID(),
RunAsNonRoot: ptr.To[bool](true),
},
ServiceAccountName: nimCache.GetName(),
Tolerations: nimCache.GetTolerations(),
NodeSelector: nimCache.GetNodeSelectors(),
},
}

Expand Down Expand Up @@ -710,7 +921,10 @@ func constructJob(nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) {
},
},
},
ImagePullSecrets: []corev1.LocalObjectReference{},
ImagePullSecrets: []corev1.LocalObjectReference{},
ServiceAccountName: nimCache.GetName(),
shivamerla marked this conversation as resolved.
Show resolved Hide resolved
Tolerations: nimCache.GetTolerations(),
NodeSelector: nimCache.GetNodeSelectors(),
},
},
BackoffLimit: ptr.To[int32](5), // retry max 5 times on failure
Expand Down
Loading