@@ -137,34 +137,8 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
137
137
originalRayJobInstance := rayJobInstance .DeepCopy ()
138
138
139
139
// Perform all validations and directly fail the RayJob if any of the validation fails
140
- validationRules := []struct {
141
- validate func () error
142
- errType utils.K8sEventType
143
- message string
144
- }{
145
- {func () error { return utils .ValidateRayJobMetadata (rayJobInstance .ObjectMeta ) }, utils .InvalidRayJobMetadata , "The RayJob metadata is invalid" },
146
- {func () error { return utils .ValidateRayJobSpec (rayJobInstance ) }, utils .InvalidRayJobSpec , "The RayJob spec is invalid" },
147
- {func () error { return utils .ValidateRayJobStatus (rayJobInstance ) }, utils .InvalidRayJobStatus , "The RayJob status is invalid" },
148
- }
149
-
150
- for _ , validation := range validationRules {
151
- if err := validation .validate (); err != nil {
152
- logger .Error (err , validation .message )
153
- r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , string (validation .errType ),
154
- "%s %s/%s: %v" , validation .message , rayJobInstance .Namespace , rayJobInstance .Name , err )
155
-
156
- rayJobInstance .Status .JobStatus = rayv1 .JobStatusFailed
157
- rayJobInstance .Status .JobDeploymentStatus = rayv1 .JobDeploymentStatusFailed
158
- rayJobInstance .Status .Reason = rayv1 .ValidationFailed
159
- rayJobInstance .Status .Message = fmt .Sprintf ("%s: %v" , validation .message , err )
160
-
161
- if err = r .updateRayJobStatus (ctx , originalRayJobInstance , rayJobInstance ); err != nil {
162
- logger .Info ("Failed to update RayJob status" , "error" , err )
163
- return ctrl.Result {RequeueAfter : RayJobDefaultRequeueDuration }, err
164
- }
165
-
166
- return ctrl.Result {}, nil
167
- }
140
+ if passed , result , err := r .validateRayJobAndUpdateStatus (ctx , rayJobInstance , originalRayJobInstance ); ! passed || err != nil {
141
+ return result , err
168
142
}
169
143
170
144
logger .Info ("RayJob" , "JobStatus" , rayJobInstance .Status .JobStatus , "JobDeploymentStatus" , rayJobInstance .Status .JobDeploymentStatus , "SubmissionMode" , rayJobInstance .Spec .SubmissionMode )
@@ -475,6 +449,41 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
475
449
return ctrl.Result {RequeueAfter : RayJobDefaultRequeueDuration }, nil
476
450
}
477
451
452
+ func (r * RayJobReconciler ) validateRayJobAndUpdateStatus (ctx context.Context , rayJobInstance , originalRayJobInstance * rayv1.RayJob ) (bool , ctrl.Result , error ) {
453
+ logger := ctrl .LoggerFrom (ctx )
454
+ validationRules := []struct {
455
+ validate func () error
456
+ errType utils.K8sEventType
457
+ message string
458
+ }{
459
+ {func () error { return utils .ValidateRayJobMetadata (rayJobInstance .ObjectMeta ) }, utils .InvalidRayJobMetadata , "The RayJob metadata is invalid" },
460
+ {func () error { return utils .ValidateRayJobSpec (rayJobInstance ) }, utils .InvalidRayJobSpec , "The RayJob spec is invalid" },
461
+ {func () error { return utils .ValidateRayJobStatus (rayJobInstance ) }, utils .InvalidRayJobStatus , "The RayJob status is invalid" },
462
+ }
463
+
464
+ for _ , validation := range validationRules {
465
+ if err := validation .validate (); err != nil {
466
+ logger .Error (err , validation .message )
467
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , string (validation .errType ),
468
+ "%s %s/%s: %v" , validation .message , rayJobInstance .Namespace , rayJobInstance .Name , err )
469
+
470
+ rayJobInstance .Status .JobStatus = rayv1 .JobStatusFailed
471
+ rayJobInstance .Status .JobDeploymentStatus = rayv1 .JobDeploymentStatusFailed
472
+ rayJobInstance .Status .Reason = rayv1 .ValidationFailed
473
+ rayJobInstance .Status .Message = fmt .Sprintf ("%s: %v" , validation .message , err )
474
+
475
+ if err = r .updateRayJobStatus (ctx , originalRayJobInstance , rayJobInstance ); err != nil {
476
+ logger .Info ("Failed to update RayJob status" , "error" , err )
477
+ return false , ctrl.Result {RequeueAfter : RayJobDefaultRequeueDuration }, err
478
+ }
479
+
480
+ return false , ctrl.Result {}, nil
481
+ }
482
+ }
483
+
484
+ return true , ctrl.Result {}, nil
485
+ }
486
+
478
487
func emitRayJobMetrics (rayJobMetricsManager * metrics.RayJobMetricsManager , rayJobName , rayJobNamespace string , originalRayJobStatus , rayJobStatus rayv1.RayJobStatus ) {
479
488
if rayJobMetricsManager == nil {
480
489
return
0 commit comments