Skip to content

Commit

Permalink
Merge pull request #1936 from rksharma95/snitch-conditional-mounts
Browse files Browse the repository at this point in the history
fix(operator): snitch conditional mounts
  • Loading branch information
rksharma95 authored Jan 13, 2025
2 parents 59ac302 + 95be886 commit 3e3ec18
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 16 deletions.
28 changes: 28 additions & 0 deletions deployments/helm/KubeArmorOperator/templates/clusterrole-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,34 @@ rules:
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ .Values.kubearmorOperator.name }}-manage-snitch-job
namespace: {{ .Release.Namespace }}
rules:
# to handle snitch mounts dynamically
- apiGroups:
- ""
resources:
- events
verbs:
- list
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- create
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ .Values.kubearmorOperator.name }}-tls-secrets-role
namespace: {{ .Release.Namespace }}
Expand Down
169 changes: 154 additions & 15 deletions pkg/KubeArmorOperator/internal/controller/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"context"
"fmt"
"reflect"
"regexp"
"slices"
"sort"
"strconv"
Expand Down Expand Up @@ -103,6 +104,142 @@ func NewClusterWatcher(client *kubernetes.Clientset, log *zap.SugaredLogger, ext
}
}

func extractVolumeFromMessage(message string) (string, bool) {
// find volume name between quotes after "volume"
// Message: MountVolume.SetUp failed for volume \"notexists-path\"
re := regexp.MustCompile(`volume\s*\"([^\"]+)\"`)
matches := re.FindStringSubmatch(message)

if len(matches) > 1 {
return matches[1], true
}
return "", false
}

func extractPathFromMessage(message string) (string, bool) {
// find mount path between quotes after "mkdir"
// Message: failed to mkdir \"/etc/apparmor.d/\": mkdir /etc/apparmor.d/: read-only file system
re := regexp.MustCompile(`mkdir\s+\"([^\"]+)\"`)
matches := re.FindStringSubmatch(message)

if len(matches) > 1 {
return matches[1], true
}
return "", false
}

func (clusterWatcher *ClusterWatcher) checkJobStatus(job, runtime, nodename string) {
defer func() {
clusterWatcher.Log.Infof("checkJobStatus completed for job: %s", job)
}()

for {
select {
case <-time.After(5 * time.Minute):
clusterWatcher.Log.Infof("watcher exit after timeout for job: %s", job)
return
default:
clusterWatcher.Log.Infof("watching status for job: %s", job)

j, err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Get(context.TODO(), job, v1.GetOptions{})
if err != nil {
clusterWatcher.Log.Warnf("cannot get job: %s", job)
return
}

if j.Status.Succeeded > 0 {
return
}

podsList, err := clusterWatcher.Client.CoreV1().Pods(common.Namespace).List(context.TODO(), v1.ListOptions{
LabelSelector: fmt.Sprintf("job-name=%s", job),
})

if err != nil {
clusterWatcher.Log.Warnf("Cannot get job pod: %s", job)
return
}

for _, pod := range podsList.Items {
mountFailure := false
failedMount := ""
events, err := clusterWatcher.Client.CoreV1().Events(common.Namespace).List(context.TODO(), v1.ListOptions{
FieldSelector: fmt.Sprintf("involvedObject.name=%s", pod.Name),
})
if err != nil {
clusterWatcher.Log.Warnf("cannot get pod events for pod: %s", pod.Name)
return
}

for _, event := range events.Items {
if event.Type == "Warning" && (event.Reason == "FailedMount" ||
event.Reason == "FailedAttachVolume" ||
event.Reason == "VolumeMountsFailed") {
clusterWatcher.Log.Infof("Got Failed Event for job pod: %v", event.Message)
mountFailure = true
failedMount, _ = extractVolumeFromMessage(event.Message)
clusterWatcher.Log.Infof("FailedMount: %s", failedMount)
break
}

if event.Type == "Warning" && event.Reason == "Failed" && strings.Contains(event.Message, "mkdir") {
clusterWatcher.Log.Infof("Got Failed Event for job pod: %v", event.Message)
if path, readOnly := extractPathFromMessage(event.Message); readOnly {
failedMount = path
mountFailure = true
clusterWatcher.Log.Infof("ReadOnly FS: %s", failedMount)
break
}
}
}

if mountFailure {
propogatePodDeletion := v1.DeletePropagationBackground
err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Delete(context.TODO(), job, v1.DeleteOptions{
PropagationPolicy: &propogatePodDeletion,
})
if err != nil {
clusterWatcher.Log.Warnf("Cannot delete job: %s, err=%s", job, err)
return
}

newJob := deploySnitch(nodename, runtime)

volumeToDelete := ""
for _, vol := range newJob.Spec.Template.Spec.Volumes {
if vol.HostPath.Path == failedMount || vol.Name == failedMount {
volumeToDelete = vol.Name
break
}
}

newJob.Spec.Template.Spec.Volumes = slices.DeleteFunc(newJob.Spec.Template.Spec.Volumes, func(vol corev1.Volume) bool {
if vol.Name == volumeToDelete {
return true
}
return false
})

newJob.Spec.Template.Spec.Containers[0].VolumeMounts = slices.DeleteFunc(newJob.Spec.Template.Spec.Containers[0].VolumeMounts, func(volMount corev1.VolumeMount) bool {
if volMount.Name == volumeToDelete {
return true
}
return false
})

newJ, err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Create(context.TODO(), newJob, v1.CreateOptions{})
if err != nil {
clusterWatcher.Log.Warnf("Cannot create job: %s, error=%s", newJob.Name, err)
return
}
job = newJ.Name
break
}
}
}
}
}

func (clusterWatcher *ClusterWatcher) WatchNodes() {
log := clusterWatcher.Log
nodeInformer := informer.Core().V1().Nodes().Informer()
Expand All @@ -113,12 +250,13 @@ func (clusterWatcher *ClusterWatcher) WatchNodes() {
runtime = strings.Split(runtime, ":")[0]
if val, ok := node.Labels[common.OsLabel]; ok && val == "linux" {
log.Infof("Installing snitch on node %s", node.Name)
_, err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Create(context.Background(), deploySnitch(node.Name, runtime), v1.CreateOptions{})
snitchJob, err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Create(context.Background(), deploySnitch(node.Name, runtime), v1.CreateOptions{})
if err != nil {
log.Errorf("Cannot run snitch on node %s, error=%s", node.Name, err.Error())
return
}
log.Infof("Snitch was installed on node %s", node.Name)
go clusterWatcher.checkJobStatus(snitchJob.Name, runtime, node.Name)
}
}
},
Expand All @@ -136,12 +274,13 @@ func (clusterWatcher *ClusterWatcher) WatchNodes() {
clusterWatcher.Log.Infof("Node might have been restarted, redeploying snitch ")
if val, ok := node.Labels[common.OsLabel]; ok && val == "linux" {
log.Infof("Installing snitch on node %s", node.Name)
_, err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Create(context.Background(), deploySnitch(node.Name, runtime), v1.CreateOptions{})
snitchJob, err := clusterWatcher.Client.BatchV1().Jobs(common.Namespace).Create(context.Background(), deploySnitch(node.Name, runtime), v1.CreateOptions{})
if err != nil {
log.Errorf("Cannot run snitch on node %s, error=%s", node.Name, err.Error())
return
}
log.Infof("Snitch was installed on node %s", node.Name)
go clusterWatcher.checkJobStatus(snitchJob.Name, runtime, node.Name)
}
}
}
Expand Down Expand Up @@ -788,14 +927,14 @@ func (clusterWatcher *ClusterWatcher) UpdateCrdStatus(cfg, phase, message string
// retry the update
return false, nil
}
clusterWatcher.Log.Info("Config CR Status Updated Successfully")
}
return true, nil
})
if err != nil {
clusterWatcher.Log.Errorf("Error updating the ConfigCR status %s", err)
return
}
clusterWatcher.Log.Info("Config CR Status Updated Successfully")
}

func (clusterWatcher *ClusterWatcher) UpdateKubeArmorConfigMap(cfg *opv1.KubeArmorConfig) {
Expand Down Expand Up @@ -1002,19 +1141,19 @@ func (clusterWatcher *ClusterWatcher) WatchRecommendedPolicies() error {
var yamlBytes []byte
policies, err := recommend.CRDFs.ReadDir(".")
if err != nil {
clusterWatcher.Log.Warnf("error reading policies FS", err)
clusterWatcher.Log.Warnf("error reading policies FS %s", err)
return err
}
for _, policy := range policies {
csp := &secv1.KubeArmorClusterPolicy{}
if !policy.IsDir() {
yamlBytes, err = recommend.CRDFs.ReadFile(policy.Name())
if err != nil {
clusterWatcher.Log.Warnf("error reading csp", policy.Name())
clusterWatcher.Log.Warnf("error reading csp %s", policy.Name())
continue
}
if err := runtime.DecodeInto(scheme.Codecs.UniversalDeserializer(), yamlBytes, csp); err != nil {
clusterWatcher.Log.Warnf("error decoding csp", policy.Name())
clusterWatcher.Log.Warnf("error decoding csp %s", policy.Name())
continue
}
}
Expand All @@ -1024,31 +1163,31 @@ func (clusterWatcher *ClusterWatcher) WatchRecommendedPolicies() error {
clusterWatcher.Log.Infof("excluding csp ", csp.Name)
err = clusterWatcher.Secv1Client.SecurityV1().KubeArmorClusterPolicies().Delete(context.Background(), csp.GetName(), metav1.DeleteOptions{})
if err != nil && !metav1errors.IsNotFound(err) {
clusterWatcher.Log.Warnf("error deleting csp", csp.GetName())
clusterWatcher.Log.Warnf("error deleting csp %s", csp.GetName())
} else if err == nil {
clusterWatcher.Log.Infof("deleted csp", csp.GetName())
clusterWatcher.Log.Infof("deleted csp :%s", csp.GetName())
}
continue
}
csp.Spec.Selector.MatchExpressions = common.RecommendedPolicies.MatchExpressions
_, err = clusterWatcher.Secv1Client.SecurityV1().KubeArmorClusterPolicies().Create(context.Background(), csp, metav1.CreateOptions{})
if err != nil && !metav1errors.IsAlreadyExists(err) {
clusterWatcher.Log.Warnf("error creating csp", csp.GetName())
clusterWatcher.Log.Warnf("error creating csp %s", csp.GetName())
continue
} else if metav1errors.IsAlreadyExists(err) {
pol, err := clusterWatcher.Secv1Client.SecurityV1().KubeArmorClusterPolicies().Get(context.Background(), csp.GetName(), metav1.GetOptions{})
if err != nil {
clusterWatcher.Log.Warnf("error getting csp", csp.GetName())
clusterWatcher.Log.Warnf("error getting csp %s", csp.GetName())
continue
}
if !reflect.DeepEqual(pol.Spec.Selector.MatchExpressions, common.RecommendedPolicies.MatchExpressions) {
pol.Spec.Selector.MatchExpressions = common.RecommendedPolicies.MatchExpressions
_, err := clusterWatcher.Secv1Client.SecurityV1().KubeArmorClusterPolicies().Update(context.Background(), pol, metav1.UpdateOptions{})
if err != nil {
clusterWatcher.Log.Warnf("error updating csp", csp.GetName())
clusterWatcher.Log.Warnf("error updating csp %s", csp.GetName())
continue
} else {
clusterWatcher.Log.Info("updated csp", csp.GetName())
clusterWatcher.Log.Infof("updated csp %s", csp.GetName())
}
}
} else {
Expand All @@ -1058,10 +1197,10 @@ func (clusterWatcher *ClusterWatcher) WatchRecommendedPolicies() error {
if !policy.IsDir() {
err = clusterWatcher.Secv1Client.SecurityV1().KubeArmorClusterPolicies().Delete(context.Background(), csp.GetName(), metav1.DeleteOptions{})
if err != nil && !metav1errors.IsNotFound(err) {
clusterWatcher.Log.Warnf("error deleting csp", csp.GetName())
clusterWatcher.Log.Warnf("error deleting csp %s", csp.GetName())
continue
} else {
clusterWatcher.Log.Info("deleted csp", csp.GetName())
} else if err == nil {
clusterWatcher.Log.Info("deleted csp %s", csp.GetName())
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/KubeArmorOperator/internal/controller/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ func deploySnitch(nodename string, runtime string) *batchv1.Job {
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/etc/apparmor.d/",
Type: &common.HostPathDirectoryOrCreate,
Type: &common.HostPathDirectory,
},
},
},
Expand Down

0 comments on commit 3e3ec18

Please sign in to comment.