-
Notifications
You must be signed in to change notification settings - Fork 258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🐛 Fix HFC to execute updates #1793
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1164,6 +1164,20 @@ func (r *BareMetalHostReconciler) actionPreparing(prov provisioner.Provisioner, | |
return recordActionFailure(info, metal3api.PreparationError, provResult.ErrorMessage) | ||
} | ||
|
||
if hfcDirty && started { | ||
hfcStillDirty, err := r.saveHostFirmwareComponents(prov, info, hfc) | ||
if err != nil { | ||
return actionError{errors.Wrap(err, "could not save the host firmware components")} | ||
} | ||
|
||
if hfcStillDirty { | ||
info.log.Info("going to update the host firmware components") | ||
if err := r.Status().Update(info.ctx, hfc); err != nil { | ||
return actionError{errors.Wrap(err, "failed to update hostfirmwarecomponents status")} | ||
} | ||
} | ||
} | ||
|
||
if bmhDirty && started { | ||
info.log.Info("saving host provisioning settings") | ||
_, err := saveHostProvisioningSettings(info.host, info) | ||
|
@@ -1740,6 +1754,32 @@ func saveHostProvisioningSettings(host *metal3api.BareMetalHost, info *reconcile | |
return dirty, nil | ||
} | ||
|
||
func (r *BareMetalHostReconciler) saveHostFirmwareComponents(prov provisioner.Provisioner, info *reconcileInfo, hfc *metal3api.HostFirmwareComponents) (dirty bool, err error) { | ||
dirty = false | ||
if reflect.DeepEqual(hfc.Status.Updates, hfc.Spec.Updates) { | ||
info.log.Info("not saving hostFirmwareComponents information since is not necessary") | ||
return dirty, nil | ||
} | ||
|
||
info.log.Info("saving hostFirmwareComponents information", "spec updates", hfc.Spec.Updates, "status updates", hfc.Status.Updates) | ||
|
||
hfc.Status.Updates = make([]metal3api.FirmwareUpdate, len(hfc.Spec.Updates)) | ||
for i := range hfc.Spec.Updates { | ||
hfc.Spec.Updates[i].DeepCopyInto(&hfc.Status.Updates[i]) | ||
} | ||
iurygregory marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Retrieve new information about the firmware components stored in ironic | ||
components, err := prov.GetFirmwareComponents() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is only going to get called once, at the beginning of manual cleaning. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, this is the problem I'm planning to fix in a separate PR, still trying to figure out how There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Zane is right. But I'm also wondering why the HFC controller does not update the components afterwards. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, good point. I thought at one point we deleted that from the HFC controller, but indeed it's still there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, I've waited some hours to see if the HFC controller would attemp to update, but it didn't..,my best guess is that I need to change the conditions when calling |
||
if err != nil { | ||
info.log.Error(err, "failed to get new information for firmware components in ironic") | ||
return dirty, err | ||
} | ||
hfc.Status.Components = components | ||
dirty = true | ||
|
||
return dirty, nil | ||
} | ||
|
||
func (r *BareMetalHostReconciler) createHostFirmwareComponents(info *reconcileInfo) error { | ||
// Check if HostFirmwareComponents already exists | ||
hfc := &metal3api.HostFirmwareComponents{} | ||
|
@@ -1755,18 +1795,31 @@ func (r *BareMetalHostReconciler) createHostFirmwareComponents(info *reconcileIn | |
|
||
// Set bmh as owner, this makes sure the resource is deleted when bmh is deleted | ||
if err = controllerutil.SetControllerReference(info.host, hfc, r.Scheme()); err != nil { | ||
iurygregory marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return errors.Wrap(err, "could not set bmh as controller") | ||
return errors.Wrap(err, "could not set bmh as controller for hostFirmwareComponents") | ||
} | ||
if err = r.Create(info.ctx, hfc); err != nil { | ||
return errors.Wrap(err, "failure creating hostFirmwareComponents resource") | ||
} | ||
|
||
info.log.Info("created new hostFirmwareComponents resource") | ||
} else { | ||
// Error reading the object | ||
return errors.Wrap(err, "could not load hostFirmwareComponents resource") | ||
return nil | ||
} | ||
// Error reading the object | ||
return errors.Wrap(err, "could not load hostFirmwareComponents resource") | ||
} | ||
// Necessary in case the CRD is created manually. | ||
|
||
if !ownerReferenceExists(info.host, hfc) { | ||
if err := controllerutil.SetControllerReference(info.host, hfc, r.Scheme()); err != nil { | ||
iurygregory marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return errors.Wrap(err, "could not set bmh as controller for hostFirmwareComponents") | ||
} | ||
if err := r.Update(info.ctx, hfc); err != nil { | ||
iurygregory marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return errors.Wrap(err, "failure updating hostFirmwareComponents resource") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -1851,15 +1904,6 @@ func (r *BareMetalHostReconciler) getHostFirmwareComponents(info *reconcileInfo) | |
|
||
// Check if there are Updates in the Spec that are different than the Status | ||
if meta.IsStatusConditionTrue(hfc.Status.Conditions, string(metal3api.HostFirmwareComponentsChangeDetected)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory we should check that the condition matches the current Generation (example) so we know the data is not out of date. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will do some tests in the follow-up I'm working on. |
||
// Check if the status have been populated | ||
if len(hfc.Status.Updates) == 0 { | ||
return false, nil, errors.New("host firmware status updates not available") | ||
} | ||
|
||
if len(hfc.Status.Components) == 0 { | ||
return false, nil, errors.New("host firmware status components not available") | ||
} | ||
|
||
if meta.IsStatusConditionTrue(hfc.Status.Conditions, string(metal3api.HostFirmwareComponentsValid)) { | ||
info.log.Info("hostFirmwareComponents indicating ChangeDetected", "namespacename", info.request.NamespacedName) | ||
return true, hfc, nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This saves the new status when we begin the manual cleaning, but I think I'm right in saying
clearHostProvisioningSettings()
does not clear them? So if there's a failure in actually applying this change, we won't retry.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if there is a failure we shouldn't retry I would say, it can be a bad firmware (Dell for example has firmware separate for each model, if I use the firmware of an R750 in R640 it complains and fails)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or it could be a dropped connection or a failed write.
We shouldn't report that we've done something if we haven't done it.
If the user provides the wrong firmware we should keep trying until they realise and stop doing that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, unless we invent a way for Ironic to say "this will never work, don't try again", the current trend is to retry.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something like this? a new function
clearHostFirmwareComponentsUpdates
to be used, since I don't think we should changeclearHostProvisioningSettings