@@ -18,28 +18,37 @@ package controllerv2
18
18
19
19
import (
20
20
"context"
21
+ "errors"
22
+ "fmt"
21
23
22
24
"github.com/go-logr/logr"
25
+ "k8s.io/apimachinery/pkg/runtime/schema"
23
26
"k8s.io/client-go/tools/record"
24
27
"k8s.io/klog/v2"
28
+ "k8s.io/utils/ptr"
25
29
ctrl "sigs.k8s.io/controller-runtime"
26
30
"sigs.k8s.io/controller-runtime/pkg/client"
31
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
27
32
28
33
kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1"
29
- runtime "github.com/kubeflow/training-operator/pkg/runtime.v2"
34
+ jobruntimes "github.com/kubeflow/training-operator/pkg/runtime.v2"
30
35
)
31
36
37
+ var errorUnsupportedRuntime = errors .New ("the specified runtime is not supported" )
38
+
32
39
type TrainJobReconciler struct {
33
40
log logr.Logger
34
41
client client.Client
35
42
recorder record.EventRecorder
43
+ runtimes map [string ]jobruntimes.Runtime
36
44
}
37
45
38
- func NewTrainJobReconciler (client client.Client , recorder record.EventRecorder ) * TrainJobReconciler {
46
+ func NewTrainJobReconciler (client client.Client , recorder record.EventRecorder , runtimes map [ string ]jobruntimes. Runtime ) * TrainJobReconciler {
39
47
return & TrainJobReconciler {
40
48
log : ctrl .Log .WithName ("trainjob-controller" ),
41
49
client : client ,
42
50
recorder : recorder ,
51
+ runtimes : runtimes ,
43
52
}
44
53
}
45
54
@@ -52,16 +61,74 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
52
61
return ctrl.Result {}, client .IgnoreNotFound (err )
53
62
}
54
63
log := ctrl .LoggerFrom (ctx ).WithValues ("trainJob" , klog .KObj (& trainJob ))
55
- ctrl .LoggerInto (ctx , log )
64
+ ctx = ctrl .LoggerInto (ctx , log )
56
65
log .V (2 ).Info ("Reconciling TrainJob" )
66
+ if err := r .createOrUpdateObjs (ctx , & trainJob ); err != nil {
67
+ return ctrl.Result {}, err
68
+ }
69
+ // TODO (tenzen-y): Do update the status.
57
70
return ctrl.Result {}, nil
58
71
}
59
72
60
- func (r * TrainJobReconciler ) SetupWithManager (mgr ctrl.Manager , runtimes map [string ]runtime.Runtime ) error {
73
+ func (r * TrainJobReconciler ) createOrUpdateObjs (ctx context.Context , trainJob * kubeflowv2.TrainJob ) error {
74
+ log := ctrl .LoggerFrom (ctx )
75
+
76
+ runtimeRefGK := runtimeRefToGroupKind (trainJob .Spec .RuntimeRef ).String ()
77
+ runtime , ok := r .runtimes [runtimeRefGK ]
78
+ if ! ok {
79
+ return fmt .Errorf ("%w: %s" , errorUnsupportedRuntime , runtimeRefGK )
80
+ }
81
+ objs , err := runtime .NewObjects (ctx , trainJob )
82
+ if err != nil {
83
+ return err
84
+ }
85
+ for _ , obj := range objs {
86
+ var gvk schema.GroupVersionKind
87
+ if gvk , err = apiutil .GVKForObject (obj .DeepCopyObject (), r .client .Scheme ()); err != nil {
88
+ return err
89
+ }
90
+ logKeysAndValues := []any {
91
+ "groupVersionKind" , gvk .String (),
92
+ "namespace" , obj .GetNamespace (),
93
+ "name" , obj .GetName (),
94
+ }
95
+ // TODO (tenzen-y): Ideally, we should use the SSA instead of checking existence.
96
+ // Non-empty resourceVersion indicates UPDATE operation.
97
+ var creationErr error
98
+ var created bool
99
+ if obj .GetResourceVersion () == "" {
100
+ creationErr = r .client .Create (ctx , obj )
101
+ created = creationErr == nil
102
+ }
103
+ switch {
104
+ case created :
105
+ log .V (5 ).Info ("Succeeded to create object" , logKeysAndValues )
106
+ continue
107
+ case client .IgnoreAlreadyExists (creationErr ) != nil :
108
+ return creationErr
109
+ default :
110
+ // This indicates CREATE operation has not been performed or the object has already existed in the cluster.
111
+ if err = r .client .Update (ctx , obj ); err != nil {
112
+ return err
113
+ }
114
+ log .V (5 ).Info ("Succeeded to update object" , logKeysAndValues )
115
+ }
116
+ }
117
+ return nil
118
+ }
119
+
120
+ func runtimeRefToGroupKind (runtimeRef kubeflowv2.RuntimeRef ) schema.GroupKind {
121
+ return schema.GroupKind {
122
+ Group : ptr .Deref (runtimeRef .APIGroup , "" ),
123
+ Kind : ptr .Deref (runtimeRef .Kind , "" ),
124
+ }
125
+ }
126
+
127
+ func (r * TrainJobReconciler ) SetupWithManager (mgr ctrl.Manager ) error {
61
128
b := ctrl .NewControllerManagedBy (mgr ).
62
129
For (& kubeflowv2.TrainJob {})
63
- for _ , run := range runtimes {
64
- for _ , registrar := range run .EventHandlerRegistrars () {
130
+ for _ , runtime := range r . runtimes {
131
+ for _ , registrar := range runtime .EventHandlerRegistrars () {
65
132
if registrar != nil {
66
133
b = registrar (b , mgr .GetClient ())
67
134
}
0 commit comments