From 9337a8cf7e3d9239ebf0979b6cb277603d4f1cf2 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Thu, 16 Jan 2025 19:17:47 +1300 Subject: [PATCH 01/47] WiP --- client/checks.go | 77 +++++++++++- client/checks_test.go | 10 +- go.mod | 1 + go.sum | 2 + internals/cli/cmd_checks.go | 12 +- internals/cli/cmd_help.go | 6 +- internals/cli/cmd_start-checks.go | 71 +++++++++++ internals/cli/cmd_start-checks_test.go | 145 +++++++++++++++++++++++ internals/cli/cmd_stop-checks.go | 66 +++++++++++ internals/cli/cmd_stop-checks_test.go | 145 +++++++++++++++++++++++ internals/daemon/api.go | 8 +- internals/daemon/api_checks.go | 63 ++++++++++ internals/overlord/checkstate/manager.go | 91 +++++++++++++- internals/plan/plan.go | 31 ++++- 14 files changed, 701 insertions(+), 27 deletions(-) create mode 100644 internals/cli/cmd_start-checks.go create mode 100644 internals/cli/cmd_start-checks_test.go create mode 100644 internals/cli/cmd_stop-checks.go create mode 100644 internals/cli/cmd_stop-checks_test.go diff --git a/client/checks.go b/client/checks.go index 8e1f24911..21f3bf9ec 100644 --- a/client/checks.go +++ b/client/checks.go @@ -15,6 +15,9 @@ package client import ( + "bytes" + "encoding/json" + "fmt" "net/url" ) @@ -24,9 +27,10 @@ type ChecksOptions struct { // level. Level CheckLevel - // Names is the list of check names to query for. A check is included in - // the results if this field is nil or empty slice, or if one of the - // values in the slice is equal to the check's name. + // Names is the list of check names on which to action. For querying, a + // check is included in the results if this field is nil or empty slice. For + // all actions, a check is included in the results if one of the values in + // the slice is equal to the check's name. Names []string } @@ -43,8 +47,17 @@ const ( type CheckStatus string const ( - CheckStatusUp CheckStatus = "up" - CheckStatusDown CheckStatus = "down" + CheckStatusUp CheckStatus = "up" + CheckStatusDown CheckStatus = "down" + CheckStatusInactive CheckStatus = "inactive" +) + +// CheckStartup defines the different startup modes for a check. +type CheckStartup string + +const ( + CheckStartupEnabled CheckStartup = "enabled" + CheckStartupDisabled CheckStartup = "disabled" ) // CheckInfo holds status information for a single health check. @@ -55,8 +68,14 @@ type CheckInfo struct { // Level is this check's level, from the layer configuration. Level CheckLevel `json:"level"` + // Startup is the startup mode for the check. If it is "enabled", the check + // will be started in a Pebble replan and when Pebble starts. If it is + // "disabled", it must be started manually. + Startup CheckStartup `json:"startup"` + // Status is the status of this check: "up" if healthy, "down" if the - // number of failures has reached the configured threshold. + // number of failures has reached the configured threshold, "disabled" if + // the check is disabled. Status CheckStatus `json:"status"` // Failures is the number of times in a row this check has failed. It is @@ -90,3 +109,49 @@ func (client *Client) Checks(opts *ChecksOptions) ([]*CheckInfo, error) { } return checks, nil } + +// AutoStart starts the checks marked as "startup: enabled". opts.Names must +// be empty for this call. We ignore ops.Level for this action. +func (client *Client) AutoStartChecks(opts *ChecksOptions) (response string, err error) { + response, err = client.doMultiCheckAction("autostart", opts.Names) + return response, err +} + +// Start starts the checks named in opts.Names. We ignore ops.Level for this +// action. +func (client *Client) StartChecks(opts *ChecksOptions) (response string, err error) { + response, err = client.doMultiCheckAction("start", opts.Names) + return response, err +} + +// Stop stops the checks named in opts.Names. We ignore ops.Level for this +// action. +func (client *Client) StopChecks(opts *ChecksOptions) (response string, err error) { + response, err = client.doMultiCheckAction("stop", opts.Names) + return response, err +} + +type multiCheckActionData struct { + Action string `json:"action"` + Checks []string `json:"checks"` +} + +func (client *Client) doMultiCheckAction(actionName string, checks []string) (changeID string, err error) { + action := multiCheckActionData{ + Action: actionName, + Checks: checks, + } + data, err := json.Marshal(&action) + if err != nil { + return "", fmt.Errorf("cannot marshal multi-check action: %w", err) + } + headers := map[string]string{ + "Content-Type": "application/json", + } + + resp, err := client.doSync("POST", "/v1/checks", nil, headers, bytes.NewBuffer(data), nil) + if err != nil { + return "", err + } + return string(resp.Result), nil +} diff --git a/client/checks_test.go b/client/checks_test.go index 74c75afbe..794e47ee1 100644 --- a/client/checks_test.go +++ b/client/checks_test.go @@ -26,7 +26,8 @@ func (cs *clientSuite) TestChecksGet(c *check.C) { cs.rsp = `{ "result": [ {"name": "chk1", "status": "up"}, - {"name": "chk3", "status": "down", "failures": 42} + {"name": "chk3", "status": "down", "failures": 42}, + {"name": "chk5", "status": "inactive"}, ], "status": "OK", "status-code": 200, @@ -35,7 +36,7 @@ func (cs *clientSuite) TestChecksGet(c *check.C) { opts := client.ChecksOptions{ Level: client.AliveLevel, - Names: []string{"chk1", "chk3"}, + Names: []string{"chk1", "chk3", "chk5"}, } checks, err := cs.cli.Checks(&opts) c.Assert(err, check.IsNil) @@ -47,11 +48,14 @@ func (cs *clientSuite) TestChecksGet(c *check.C) { Name: "chk3", Status: client.CheckStatusDown, Failures: 42, + }, { + Name: "chk5", + Status: client.CheckStatusInactive, }}) c.Assert(cs.req.Method, check.Equals, "GET") c.Assert(cs.req.URL.Path, check.Equals, "/v1/checks") c.Assert(cs.req.URL.Query(), check.DeepEquals, url.Values{ "level": {"alive"}, - "names": {"chk1", "chk3"}, + "names": {"chk1", "chk3", "chk5"}, }) } diff --git a/go.mod b/go.mod index dde8646ac..218cb10c6 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/pkg/term v1.1.0 golang.org/x/sys v0.21.0 golang.org/x/term v0.21.0 + golang.org/x/text v0.16.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index 31493018c..3c18f1a25 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/internals/cli/cmd_checks.go b/internals/cli/cmd_checks.go index 9a90b90c4..b7353424f 100644 --- a/internals/cli/cmd_checks.go +++ b/internals/cli/cmd_checks.go @@ -78,16 +78,20 @@ func (cmd *cmdChecks) Execute(args []string) error { w := tabWriter() defer w.Flush() - fmt.Fprintln(w, "Check\tLevel\tStatus\tFailures\tChange") + fmt.Fprintln(w, "Check\tLevel\tStartup\tStatus\tFailures\tChange") for _, check := range checks { level := check.Level if level == client.UnsetLevel { level = "-" } - fmt.Fprintf(w, "%s\t%s\t%s\t%d/%d\t%s\n", - check.Name, level, check.Status, check.Failures, - check.Threshold, cmd.changeInfo(check)) + failures := "-" + if check.Status != client.CheckStatusInactive { + failures = fmt.Sprintf("%d/%d", check.Failures, check.Threshold) + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\n", + check.Name, level, check.Startup, check.Status, failures, + cmd.changeInfo(check)) } return nil } diff --git a/internals/cli/cmd_help.go b/internals/cli/cmd_help.go index 81bdb3298..9a0a25de0 100644 --- a/internals/cli/cmd_help.go +++ b/internals/cli/cmd_help.go @@ -193,15 +193,15 @@ var HelpCategories = []HelpCategory{{ }, { Label: "Plan", Description: "view and change configuration", - Commands: []string{"add", "plan"}, + Commands: []string{"add", "plan", "replan"}, }, { Label: "Services", Description: "manage services", - Commands: []string{"services", "logs", "start", "restart", "signal", "stop", "replan"}, + Commands: []string{"services", "logs", "start", "restart", "signal", "stop"}, }, { Label: "Checks", Description: "manage health checks", - Commands: []string{"checks", "health"}, + Commands: []string{"checks", "start-checks", "stop-checks", "health"}, }, { Label: "Files", Description: "work with files and execute commands", diff --git a/internals/cli/cmd_start-checks.go b/internals/cli/cmd_start-checks.go new file mode 100644 index 000000000..c47e223f5 --- /dev/null +++ b/internals/cli/cmd_start-checks.go @@ -0,0 +1,71 @@ +// Copyright (c) 2025 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cli + +import ( + "fmt" + "encoding/json" + + "github.com/canonical/go-flags" + + "github.com/canonical/pebble/client" +) + +const cmdStartChecksSummary = "Start one or more checks" +const cmdStartChecksDescription = ` +The start-checks command starts the configured health checks provided as +positional arguments. For any checks that are already active, the command +has no effect. +` + +type cmdStartChecks struct { + client *client.Client + + Positional struct { + Checks []string `positional-arg-name:"" required:"1"` + } `positional-args:"yes"` +} + +func init() { + AddCommand(&CmdInfo{ + Name: "start-checks", + Summary: cmdStartChecksSummary, + Description: cmdStartChecksDescription, + New: func(opts *CmdOptions) flags.Commander { + return &cmdStartChecks{client: opts.Client} + }, + }) +} + +func (cmd cmdStartChecks) Execute(args []string) error { + if len(args) > 1 { + return ErrExtraArgs + } + + checkopts := client.ChecksOptions{ + Names: cmd.Positional.Checks, + } + response, err := cmd.client.StartChecks(&checkopts) + if err != nil { + return err + } + + var result interface{} + if err := json.Unmarshal([]byte(response), &result); err != nil { + return fmt.Errorf("failed to decode JSON response: %w", err) + } + fmt.Fprintln(Stdout, result) + return nil +} diff --git a/internals/cli/cmd_start-checks_test.go b/internals/cli/cmd_start-checks_test.go new file mode 100644 index 000000000..5fc410d91 --- /dev/null +++ b/internals/cli/cmd_start-checks_test.go @@ -0,0 +1,145 @@ +// Copyright (c) 2025 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cli_test + +import ( + "fmt" + "net/http" + + "gopkg.in/check.v1" + + "github.com/canonical/pebble/internals/cli" +) + +func (s *PebbleSuite) TestStartChecks(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/v1/changes/28" { + c.Check(r.Method, check.Equals, "GET") + fmt.Fprintf(w, `{ + "type": "sync", + "result": { + "id": "28", + "kind": "start", + "summary": "...", + "status": "Done", + "ready": true, + "spawn-time": "2016-04-21T01:02:03Z", + "ready-time": "2016-04-21T01:02:04Z", + "tasks": [] + } + }`) + return + } + + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "start", + "checks": []interface{}{"chk1", "chk2"}, + }) + + fmt.Fprintf(w, `{ + "type": "async", + "status-code": 202, + "change": "28" + }`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2"}) + c.Assert(err, check.IsNil) + c.Assert(rest, check.HasLen, 0) + c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stderr(), check.Equals, "") +} + +func (s *PebbleSuite) TestStartChecksFails(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "start", + "checks": []interface{}{"chk1", "chk3"}, + }) + + fmt.Fprintf(w, `{"type": "error", "result": {"message": "could not foo"}}`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk3"}) + c.Assert(err, check.ErrorMatches, "could not foo") + c.Assert(rest, check.HasLen, 1) + c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stderr(), check.Equals, "") +} + +func (s *PebbleSuite) TestStartChecksNoWait(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + c.Check(r.URL.Path, check.Not(check.Equals), "/v1/changes/28") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "start", + "checks": []interface{}{"chk1", "chk2"}, + }) + + fmt.Fprintf(w, `{ + "type": "async", + "status-code": 202, + "change": "28" + }`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2", "--no-wait"}) + c.Assert(err, check.IsNil) + c.Assert(rest, check.HasLen, 0) + c.Check(s.Stdout(), check.Equals, "28\n") + c.Check(s.Stderr(), check.Equals, "") +} + +func (s *PebbleSuite) TestStartChecksFailsGetChange(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/v1/changes/28" { + c.Check(r.Method, check.Equals, "GET") + fmt.Fprintf(w, `{"type": "error", "result": {"message": "could not bar"}}`) + return + } + + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "start", + "checks": []interface{}{"chk1", "chk2"}, + }) + + fmt.Fprintf(w, `{ + "type": "async", + "status-code": 202, + "change": "28" + }`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2"}) + c.Assert(err, check.ErrorMatches, "could not bar") + c.Assert(rest, check.HasLen, 1) + c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stderr(), check.Equals, "") +} diff --git a/internals/cli/cmd_stop-checks.go b/internals/cli/cmd_stop-checks.go new file mode 100644 index 000000000..2d9598222 --- /dev/null +++ b/internals/cli/cmd_stop-checks.go @@ -0,0 +1,66 @@ +// Copyright (c) 2025 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cli + +import ( + "fmt" + + "github.com/canonical/go-flags" + + "github.com/canonical/pebble/client" +) + +const cmdStopChecksSummary = "Stop one or more checks" +const cmdStopChecksDescription = ` +The stop-checks command stops the configured health checks provided as +positional arguments. For any checks that are inactive, the command has +no effect. +` + +type cmdStopChecks struct { + client *client.Client + + Positional struct { + Checks []string `positional-arg-name:"" required:"1"` + } `positional-args:"yes"` +} + +func init() { + AddCommand(&CmdInfo{ + Name: "stop-checks", + Summary: cmdStopChecksSummary, + Description: cmdStopChecksDescription, + New: func(opts *CmdOptions) flags.Commander { + return &cmdStopChecks{client: opts.Client} + }, + }) +} + +func (cmd cmdStopChecks) Execute(args []string) error { + if len(args) > 1 { + return ErrExtraArgs + } + + checkopts := client.ChecksOptions{ + Names: cmd.Positional.Checks, + } + response, err := cmd.client.StopChecks(&checkopts) + if err != nil { + return err + } + + fmt.Fprintln(Stdout, response) + return nil +} diff --git a/internals/cli/cmd_stop-checks_test.go b/internals/cli/cmd_stop-checks_test.go new file mode 100644 index 000000000..8cdf5ad2f --- /dev/null +++ b/internals/cli/cmd_stop-checks_test.go @@ -0,0 +1,145 @@ +// Copyright (c) 2025 Canonical Ltd +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 3 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package cli_test + +import ( + "fmt" + "net/http" + + "gopkg.in/check.v1" + + "github.com/canonical/pebble/internals/cli" +) + +func (s *PebbleSuite) TestStopChecks(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/v1/changes/25" { + c.Check(r.Method, check.Equals, "GET") + fmt.Fprintf(w, `{ + "type": "sync", + "result": { + "id": "25", + "kind": "stop", + "summary": "...", + "status": "Done", + "ready": true, + "spawn-time": "2016-04-21T01:02:03Z", + "ready-time": "2016-04-21T01:02:04Z", + "tasks": [] + } + }`) + return + } + + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "stop", + "checks": []interface{}{"chk1", "chk2"}, + }) + + fmt.Fprintf(w, `{ + "type": "async", + "status-code": 202, + "change": "25" + }`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2"}) + c.Assert(err, check.IsNil) + c.Assert(rest, check.HasLen, 0) + c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stderr(), check.Equals, "") +} + +func (s *PebbleSuite) TestStopChecksFails(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "stop", + "checks": []interface{}{"chk1", "chk3"}, + }) + + fmt.Fprintf(w, `{"type": "error", "result": {"message": "could not foo"}}`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk3"}) + c.Assert(err, check.ErrorMatches, "could not foo") + c.Assert(rest, check.HasLen, 1) + c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stderr(), check.Equals, "") +} + +func (s *PebbleSuite) TestStopChecksNoWait(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + c.Check(r.URL.Path, check.Not(check.Equals), "/v1/changes/25") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "stop", + "checks": []interface{}{"chk1", "chk2"}, + }) + + fmt.Fprintf(w, `{ + "type": "async", + "status-code": 202, + "change": "25" + }`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2", "--no-wait"}) + c.Assert(err, check.IsNil) + c.Assert(rest, check.HasLen, 0) + c.Check(s.Stdout(), check.Equals, "46\n") + c.Check(s.Stderr(), check.Equals, "") +} + +func (s *PebbleSuite) TestStopChecksFailsGetChange(c *check.C) { + s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/v1/changes/25" { + c.Check(r.Method, check.Equals, "GET") + fmt.Fprintf(w, `{"type": "error", "result": {"message": "could not bar"}}`) + return + } + + c.Check(r.Method, check.Equals, "POST") + c.Check(r.URL.Path, check.Equals, "/v1/checks") + + body := DecodedRequestBody(c, r) + c.Check(body, check.DeepEquals, map[string]interface{}{ + "action": "stop", + "checks": []interface{}{"chk1", "chk2"}, + }) + + fmt.Fprintf(w, `{ + "type": "async", + "status-code": 202, + "change": "25" + }`) + }) + + rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2"}) + c.Assert(err, check.ErrorMatches, "could not bar") + c.Assert(rest, check.HasLen, 1) + c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stderr(), check.Equals, "") +} diff --git a/internals/daemon/api.go b/internals/daemon/api.go index e6c6de16c..9a899a36f 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -89,9 +89,11 @@ var API = []*Command{{ WriteAccess: AdminAccess{}, POST: v1PostSignals, }, { - Path: "/v1/checks", - ReadAccess: UserAccess{}, - GET: v1GetChecks, + Path: "/v1/checks", + ReadAccess: UserAccess{}, + WriteAccess: AdminAccess{}, + GET: v1GetChecks, + POST: v1PostChecks, }, { Path: "/v1/notices", ReadAccess: UserAccess{}, diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 10dc45b2b..0deb1dc63 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -15,6 +15,8 @@ package daemon import ( + "encoding/json" + "fmt" "net/http" "github.com/canonical/x-go/strutil" @@ -25,6 +27,7 @@ import ( type checkInfo struct { Name string `json:"name"` Level string `json:"level,omitempty"` + Startup string `json:"startup,omitempty"` Status string `json:"status"` Failures int `json:"failures,omitempty"` Threshold int `json:"threshold"` @@ -56,6 +59,7 @@ func v1GetChecks(c *Command, r *http.Request, _ *UserState) Response { info := checkInfo{ Name: check.Name, Level: string(check.Level), + Startup: string(check.Startup), Status: string(check.Status), Failures: check.Failures, Threshold: check.Threshold, @@ -66,3 +70,62 @@ func v1GetChecks(c *Command, r *http.Request, _ *UserState) Response { } return SyncResponse(infos) } + +func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { + var payload struct { + Action string `json:"action"` + Checks []string `json:"checks"` + } + + decoder := json.NewDecoder(r.Body) + if err := decoder.Decode(&payload); err != nil { + return BadRequest("cannot decode data from request body: %v", err) + } + + switch payload.Action { + case "autostart": + if len(payload.Checks) != 0 { + return BadRequest("%s accepts no check names", payload.Action) + } + default: + if len(payload.Checks) == 0 { + return BadRequest("no checks to %s provided", payload.Action) + } + } + + var err error + var checks []string + checkmgr := c.d.overlord.CheckManager() + plan := c.d.overlord.PlanManager().Plan() + + switch payload.Action { + case "start", "autostart": + checks, err = checkmgr.StartChecks(plan, payload.Checks) + case "stop": + checks, err = checkmgr.StopChecks(plan, payload.Checks) + default: + return BadRequest("action %q is unsupported", payload.Action) + } + if err != nil { + return BadRequest("cannot %s checks: %v", payload.Action, err) + } + + st := c.d.overlord.State() + st.EnsureBefore(0) // start and stop tasks right away + + // TODO: figure out what the response should be - nothing? A message? If not + // nothing, then it ought to be a JSON payload, so what's the format. It's + // all messy right now, and the cmd_*-checks.go files need to be aligned as + // well. Maybe there is an existing return object like BadRequest but good + // that would be appropriate? + + var result string + if len(checks) == 0 { + result = fmt.Sprintf("No checks needed to %s", payload.Action) + } else if len(checks) == 1 { + result = fmt.Sprintf("Queued %s for check %q", payload.Action, checks[0]) + } else { + result = fmt.Sprintf("Queued %s for check %q and %d more", payload.Action, checks[0], len(checks)-1) + } + return SyncResponse(result) +} diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 81eed861d..c72c4c9fe 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -43,6 +43,8 @@ type CheckManager struct { checksLock sync.Mutex checks map[string]CheckInfo + + plan *plan.Plan } // FailureFunc is the type of function called when a failure action is triggered. @@ -136,6 +138,13 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { // Don't restart check if its configuration hasn't changed. continue } + // We exclude any checks that have changed from startup:enabled + // to startup:disabled, because these should now be inactive and + // only started when explicitly requested. + if newConfig.Startup == plan.CheckStartupDisabled && + oldConfig.Startup == plan.CheckStartupEnabled { + continue + } // Check is in old and new plans and has been modified. newOrModified[details.Name] = true } @@ -145,10 +154,17 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { } } - // Also find checks that are new (in new plan but not in old one). + // Also find checks that are new (in new plan but not in old one) and have + // `startup` set to `enabled`. for _, config := range newPlan.Checks { if !existingChecks[config.Name] { - newOrModified[config.Name] = true + if config.Startup == plan.CheckStartupEnabled { + newOrModified[config.Name] = true + } else { + // Check is new and should be inactive - no need to start it, + // but we need to add it to the list of existing checks. + m.updateCheckInfo(config, "", 0) + } } } @@ -316,12 +332,15 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail defer m.checksLock.Unlock() status := CheckStatusUp - if failures >= config.Threshold { + if changeID == "" { + status = CheckStatusInactive + } else if failures >= config.Threshold { status = CheckStatusDown } m.checks[config.Name] = CheckInfo{ Name: config.Name, Level: config.Level, + Startup: config.Startup, Status: status, Failures: failures, Threshold: config.Threshold, @@ -340,6 +359,7 @@ func (m *CheckManager) deleteCheckInfo(name string) { type CheckInfo struct { Name string Level plan.CheckLevel + Startup plan.CheckStartup Status CheckStatus Failures int Threshold int @@ -349,10 +369,71 @@ type CheckInfo struct { type CheckStatus string const ( - CheckStatusUp CheckStatus = "up" - CheckStatusDown CheckStatus = "down" + CheckStatusUp CheckStatus = "up" + CheckStatusDown CheckStatus = "down" + CheckStatusInactive CheckStatus = "inactive" ) type checker interface { check(ctx context.Context) error } + +// StartChecks starts the specified checks by creating a new perform-checks +// change for each check that is in the current plan and not already running. +func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]string, error) { + m.state.Lock() + defer m.state.Unlock() + + var started []string + for _, name := range checks { + // If the check is not in the plan, return an error. + check, ok := currentPlan.Checks[name] + if !ok { + return nil, fmt.Errorf("check %s is not in the current plan", name) + } + info, ok := m.checks[name] + if !ok { + panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) + } + // If the check is already running, skip it. + if info.ChangeID != "" { + continue + } + changeID := performCheckChange(m.state, check) + m.updateCheckInfo(check, changeID, 0) + started = append(started, check.Name) + } + + return started, nil +} + +// StopChecks stops the specified checks by aborting the perform-check or +// recover-check change associated with the check, skipping any checks that are +// already inactive. +func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]string, error) { + m.state.Lock() + defer m.state.Unlock() + + var stopped []string + for _, name := range checks { + // If the check is not in the current plan, return an error. + check, ok := currentPlan.Checks[name] + if !ok { + return nil, fmt.Errorf("check %s is not in the current plan", name) + } + info, ok := m.checks[name] + if !ok { + panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) + } + // If the check is not running, skip it. + if info.ChangeID == "" { + continue + } + change := m.state.Change(info.ChangeID) + change.Abort() + m.updateCheckInfo(currentPlan.Checks[check.Name], "", 0) + stopped = append(stopped, check.Name) + } + + return stopped, nil +} diff --git a/internals/plan/plan.go b/internals/plan/plan.go index d2a9a4f3f..1fa82d308 100644 --- a/internals/plan/plan.go +++ b/internals/plan/plan.go @@ -420,9 +420,10 @@ const ( // Check specifies configuration for a single health check. type Check struct { // Basic details - Name string `yaml:"-"` - Override Override `yaml:"override,omitempty"` - Level CheckLevel `yaml:"level,omitempty"` + Name string `yaml:"-"` + Override Override `yaml:"override,omitempty"` + Level CheckLevel `yaml:"level,omitempty"` + Startup CheckStartup `yaml:"startup,omitempty"` // Common check settings Period OptionalDuration `yaml:"period,omitempty"` @@ -455,6 +456,9 @@ func (c *Check) Merge(other *Check) { if other.Level != "" { c.Level = other.Level } + if other.Startup != "" { + c.Startup = other.Startup + } if other.Period.IsSet { c.Period = other.Period } @@ -493,6 +497,15 @@ const ( ReadyLevel CheckLevel = "ready" ) +// CheckStartup defines the different startup modes for a check. +type CheckStartup string + +const ( + UnsetCheckStartup CheckStartup = "" + CheckStartupEnabled CheckStartup = "enabled" + CheckStartupDisabled CheckStartup = "disabled" +) + // HTTPCheck holds the configuration for an HTTP health check. type HTTPCheck struct { URL string `yaml:"url,omitempty"` @@ -794,6 +807,13 @@ func CombineLayers(layers ...*Layer) (*Layer, error) { } for _, check := range combined.Checks { + if check.Startup == UnsetCheckStartup { + // Services default to startup:disabled, and it would be nice if + // checks aligned with that. However, when checks were added there + // wasn't a startup field, and they were effectively startup:enabled + // so we need to keep that behaviour, at least until Pebble 2.0. + check.Startup = CheckStartupEnabled + } if !check.Period.IsSet { check.Period.Value = defaultCheckPeriod } @@ -908,6 +928,11 @@ func (layer *Layer) Validate() error { Message: fmt.Sprintf(`plan check %q level must be "alive" or "ready"`, name), } } + if check.Startup != UnsetCheckStartup && check.Startup != CheckStartupEnabled && check.Startup != CheckStartupDisabled { + return &FormatError{ + Message: fmt.Sprintf(`plan check %q startup must be "enabled" or "disabled"`, name), + } + } if check.Period.IsSet && check.Period.Value == 0 { return &FormatError{ Message: fmt.Sprintf("plan check %q period must not be zero", name), From 138c087ca9e6072fc79e3b8f8a5b04b5b3cd710b Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Thu, 16 Jan 2025 20:26:11 +1300 Subject: [PATCH 02/47] Fix older wording. --- client/checks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/checks.go b/client/checks.go index 21f3bf9ec..a7b547189 100644 --- a/client/checks.go +++ b/client/checks.go @@ -74,7 +74,7 @@ type CheckInfo struct { Startup CheckStartup `json:"startup"` // Status is the status of this check: "up" if healthy, "down" if the - // number of failures has reached the configured threshold, "disabled" if + // number of failures has reached the configured threshold, "inactive" if // the check is disabled. Status CheckStatus `json:"status"` From 692be67c0f18cad5cbfe12b6c250acb5e4fcd53d Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Thu, 16 Jan 2025 20:29:02 +1300 Subject: [PATCH 03/47] Remove module that's not required any more. --- go.mod | 1 - go.sum | 2 -- 2 files changed, 3 deletions(-) diff --git a/go.mod b/go.mod index 218cb10c6..dde8646ac 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,6 @@ require ( github.com/pkg/term v1.1.0 golang.org/x/sys v0.21.0 golang.org/x/term v0.21.0 - golang.org/x/text v0.16.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index 3c18f1a25..31493018c 100644 --- a/go.sum +++ b/go.sum @@ -23,8 +23,6 @@ golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= From 1d1744c6b0f6b915850209b620a5a040d333b732 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Thu, 16 Jan 2025 20:30:39 +1300 Subject: [PATCH 04/47] go fmt ./... --- client/checks.go | 8 ++++---- internals/cli/cmd_start-checks.go | 2 +- internals/cli/cmd_start-checks_test.go | 8 ++++---- internals/cli/cmd_stop-checks_test.go | 8 ++++---- internals/daemon/api.go | 2 +- internals/daemon/api_checks.go | 2 +- internals/overlord/checkstate/manager.go | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/client/checks.go b/client/checks.go index a7b547189..f21b80ae6 100644 --- a/client/checks.go +++ b/client/checks.go @@ -47,8 +47,8 @@ const ( type CheckStatus string const ( - CheckStatusUp CheckStatus = "up" - CheckStatusDown CheckStatus = "down" + CheckStatusUp CheckStatus = "up" + CheckStatusDown CheckStatus = "down" CheckStatusInactive CheckStatus = "inactive" ) @@ -132,8 +132,8 @@ func (client *Client) StopChecks(opts *ChecksOptions) (response string, err erro } type multiCheckActionData struct { - Action string `json:"action"` - Checks []string `json:"checks"` + Action string `json:"action"` + Checks []string `json:"checks"` } func (client *Client) doMultiCheckAction(actionName string, checks []string) (changeID string, err error) { diff --git a/internals/cli/cmd_start-checks.go b/internals/cli/cmd_start-checks.go index c47e223f5..c85f453aa 100644 --- a/internals/cli/cmd_start-checks.go +++ b/internals/cli/cmd_start-checks.go @@ -15,8 +15,8 @@ package cli import ( - "fmt" "encoding/json" + "fmt" "github.com/canonical/go-flags" diff --git a/internals/cli/cmd_start-checks_test.go b/internals/cli/cmd_start-checks_test.go index 5fc410d91..9d250ef82 100644 --- a/internals/cli/cmd_start-checks_test.go +++ b/internals/cli/cmd_start-checks_test.go @@ -48,7 +48,7 @@ func (s *PebbleSuite) TestStartChecks(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "start", + "action": "start", "checks": []interface{}{"chk1", "chk2"}, }) @@ -73,7 +73,7 @@ func (s *PebbleSuite) TestStartChecksFails(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "start", + "action": "start", "checks": []interface{}{"chk1", "chk3"}, }) @@ -95,7 +95,7 @@ func (s *PebbleSuite) TestStartChecksNoWait(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "start", + "action": "start", "checks": []interface{}{"chk1", "chk2"}, }) @@ -126,7 +126,7 @@ func (s *PebbleSuite) TestStartChecksFailsGetChange(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "start", + "action": "start", "checks": []interface{}{"chk1", "chk2"}, }) diff --git a/internals/cli/cmd_stop-checks_test.go b/internals/cli/cmd_stop-checks_test.go index 8cdf5ad2f..e8ce7adb3 100644 --- a/internals/cli/cmd_stop-checks_test.go +++ b/internals/cli/cmd_stop-checks_test.go @@ -48,7 +48,7 @@ func (s *PebbleSuite) TestStopChecks(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "stop", + "action": "stop", "checks": []interface{}{"chk1", "chk2"}, }) @@ -73,7 +73,7 @@ func (s *PebbleSuite) TestStopChecksFails(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "stop", + "action": "stop", "checks": []interface{}{"chk1", "chk3"}, }) @@ -95,7 +95,7 @@ func (s *PebbleSuite) TestStopChecksNoWait(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "stop", + "action": "stop", "checks": []interface{}{"chk1", "chk2"}, }) @@ -126,7 +126,7 @@ func (s *PebbleSuite) TestStopChecksFailsGetChange(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "stop", + "action": "stop", "checks": []interface{}{"chk1", "chk2"}, }) diff --git a/internals/daemon/api.go b/internals/daemon/api.go index 9a899a36f..4de12c1c7 100644 --- a/internals/daemon/api.go +++ b/internals/daemon/api.go @@ -93,7 +93,7 @@ var API = []*Command{{ ReadAccess: UserAccess{}, WriteAccess: AdminAccess{}, GET: v1GetChecks, - POST: v1PostChecks, + POST: v1PostChecks, }, { Path: "/v1/notices", ReadAccess: UserAccess{}, diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 0deb1dc63..845016d35 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -73,7 +73,7 @@ func v1GetChecks(c *Command, r *http.Request, _ *UserState) Response { func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { var payload struct { - Action string `json:"action"` + Action string `json:"action"` Checks []string `json:"checks"` } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index c72c4c9fe..7d4a5dc44 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -44,7 +44,7 @@ type CheckManager struct { checksLock sync.Mutex checks map[string]CheckInfo - plan *plan.Plan + plan *plan.Plan } // FailureFunc is the type of function called when a failure action is triggered. From f4a7a881f85a1e401cfbfa9e6ee2146309ca18d9 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 10:24:39 +1300 Subject: [PATCH 05/47] Add initial documentation. --- docs/reference/cli-commands.md | 57 +++++++++++++++++++++++++-- docs/reference/health-checks.md | 28 ++++++++++--- docs/reference/layer-specification.md | 4 ++ docs/specs/openapi.yaml | 41 +++++++++++++++++++ 4 files changed, 122 insertions(+), 8 deletions(-) diff --git a/docs/reference/cli-commands.md b/docs/reference/cli-commands.md index 7a48494bb..46f0ce6b8 100644 --- a/docs/reference/cli-commands.md +++ b/docs/reference/cli-commands.md @@ -8,9 +8,9 @@ The `pebble` command has the following subcommands, organised into logical group * Run: [run](#reference_pebble_run_command) * Info: [help](#reference_pebble_help_command), [version](#reference_pebble_version_command) -* Plan: [add](#reference_pebble_add_command), [plan](#reference_pebble_plan_command) -* Services: [services](#reference_pebble_services_command), [logs](#reference_pebble_logs_command), [start](#reference_pebble_start_command), [restart](#reference_pebble_restart_command), [signal](#reference_pebble_signal_command), [stop](#reference_pebble_stop_command), [replan](#reference_pebble_replan_command) -* Checks: [checks](#reference_pebble_checks_command), [health](#reference_pebble_health_command) +* Plan: [add](#reference_pebble_add_command), [plan](#reference_pebble_plan_command), [replan](#reference_pebble_replan_command) +* Services: [services](#reference_pebble_services_command), [logs](#reference_pebble_logs_command), [start](#reference_pebble_start_command), [restart](#reference_pebble_restart_command), [signal](#reference_pebble_signal_command), [stop](#reference_pebble_stop_command) +* Checks: [checks](#reference_pebble_checks_command), [start-checks](#reference_pebble_start_checks_command), [stop-checks](#reference_pebble_stop_checks_command), [health](#reference_pebble_health_command) * Files: [push](#reference_pebble_push_command), [pull](#reference_pebble_pull_command), [ls](#reference_pebble_ls_command), [mkdir](#reference_pebble_mkdir_command), [rm](#reference_pebble_rm_command), [exec](#reference_pebble_exec_command) * Changes: [changes](#reference_pebble_changes_command), [tasks](#reference_pebble_tasks_command) * Notices: [warnings](#reference_pebble_warnings_command), [okay](#reference_pebble_okay_command), [notices](#reference_pebble_notices_command), [notice](#reference_pebble_notice_command), [notify](#reference_pebble_notify_command) @@ -1007,6 +1007,31 @@ pebble start srv1 srv2 ``` +(reference_pebble_start_checks_command)= +## start-checks + +The `start-checks` command starts the checks with the provided names. + + +```{terminal} +:input: pebble start-checks --help +Usage: + pebble start-checks ... + +The start-checks command starts the configured health checks provided as +positional arguments. For any checks that are already active, the command +has no effect. +``` + + +### Examples + +To start specific checks, run `pebble start-checks` followed by one or more check names. For example, to start two checks named "chk1" and "chk2", run: + +```bash +pebble start-checks chk1 chk2 +``` + (reference_pebble_stop_command)= ## stop @@ -1040,6 +1065,32 @@ pebble stop srv1 ``` +(reference_pebble_stop_checks_command)= +## stop-checks + +The `stop-checks` command stops the checks with the provided names. + + +```{terminal} +:input: pebble stop-checks --help +Usage: + pebble stop-checks ... + +The stop-checks command stops the configured health checks provided as +positional arguments. For any checks that are inactive, the command has +no effect. +``` + + +### Examples + +To stop specific checks, use `pebble stop-checks` followed by one or more check names. The following example stops one check named "chk1": + +```bash +pebble stop-checks chk1 +``` + + (reference_pebble_tasks_command)= ## tasks diff --git a/docs/reference/health-checks.md b/docs/reference/health-checks.md index ad2f4e4c8..95716fe48 100644 --- a/docs/reference/health-checks.md +++ b/docs/reference/health-checks.md @@ -15,6 +15,8 @@ checks: # Optional level: alive | ready # Optional + startup: enabled | disabled + # Optional period: # Optional timeout: @@ -107,20 +109,22 @@ checks: test: override: replace + startup: disabled http: url: http://localhost:8080/test ``` ## Checks command -You can view check status using the `pebble checks` command. This reports the checks along with their status (`up` or `down`) and number of failures. For example: +You can view check status using the `pebble checks` command. This reports the checks along with their status (`up`, `down`, or `inactive`) and number of failures. For example: ```{terminal} :input: pebble checks -Check Level Status Failures Change -up alive up 0/1 10 -online ready down 1/3 13 (dial tcp 127.0.0.1:8000: connect: connection refused) -test - down 42/3 14 (Get "http://localhost:8080/": dial t... run "pebble tasks 14" for more) +Check Level Startup Status Failures Change +up alive enabled up 0/1 10 +online ready enabled down 1/3 13 (dial tcp 127.0.0.1:8000: connect: connection refused) +test - disabled down 42/3 14 (Get "http://localhost:8080/": dial t... run "pebble tasks 14" for more) +extra - disabled inactive - - ``` The "Failures" column shows the current number of failures since the check started failing, a slash, and the configured threshold. @@ -132,10 +136,24 @@ Health checks are implemented using two change kinds: * `perform-check`: drives the check while it's "up". The change finishes when the number of failures hits the threshold, at which point the change switches to Error status and a `recover-check` change is spawned. Each check failure records a task log. * `recover-check`: drives the check while it's "down". The change finishes when the check starts succeeding again, at which point the change switches to Done status and a new `perform-check` change is spawned. Again, each check failure records a task log. +When a check is stopped, the active `perform-check` or `recover-check` change is aborted. When a stopped (inactive) check is started, a new `perform-check` change is created for the check. + +## Start-checks and stop-checks commands + +You can stop one or more checks using the `pebble stop-checks` command. A stopped check shows in the `pebble checks` output as "inactive" status, and the check will no longer be executed until the check is started again. Stopped (inactive) checks appear in check lists but do not contribute to any overall health calculations - they behave as if the check did not exist. + +A stopped check that has `startup` set to `enabled` will be started in a `replan` operation. Stopped checks can also be manually started via the `pebble start-checks` command. + +Checks that have `startup` set to `disabled` will be added in a stopped (inactive) state. These checks will only be started when instructed by a `pebble start-checks` command. + +Including a check that is already running in a `start-checks` command, or including a check that is already stopped (inactive) in a `stop-checks` command is always safe and will simply have no effect on the check. + ## Health endpoint If the `--http` option was given when starting `pebble run`, Pebble exposes a `/v1/health` HTTP endpoint that allows a user to query the health of configured checks, optionally filtered by check level with the query string `?level=` This endpoint returns an HTTP 200 status if the checks are healthy, HTTP 502 otherwise. +Stopped (inactive) checks are ignored for health calculations. + Each check can specify a `level` of "alive" or "ready". These have semantic meaning: "alive" means the check or the service it's connected to is up and running; "ready" means it's properly accepting network traffic. These correspond to [Kubernetes "liveness" and "readiness" probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/). The tool running the Pebble server can make use of this, for example, under Kubernetes you could initialize its liveness and readiness probes to hit Pebble's `/v1/health` endpoint with `?level=alive` and `?level=ready` filters, respectively. diff --git a/docs/reference/layer-specification.md b/docs/reference/layer-specification.md index 84d0fddb8..0631e9eac 100644 --- a/docs/reference/layer-specification.md +++ b/docs/reference/layer-specification.md @@ -149,6 +149,10 @@ checks: # section in the docs for details. level: alive | ready + # (Optional) Control whether the check is started automatically when + # Pebble starts or performs a 'replan' operation. Default is "enabled". + startup: enabled | disabled + # (Optional) Check is run every time this period (time interval) # elapses. Must not be zero. Default is "10s". period: diff --git a/docs/specs/openapi.yaml b/docs/specs/openapi.yaml index 7c05882b5..748801540 100644 --- a/docs/specs/openapi.yaml +++ b/docs/specs/openapi.yaml @@ -311,6 +311,47 @@ paths: } ] } + post: + summary: Manage checks + description: Perform a check operation such as start or stop. + tags: + - checks + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + action: + type: string + description: The action to perform. + enum: ["autostart", "start", "stop"] + checks: + type: array + description: | + A list of service names. Required for "start" and "stop". + + Ignored for "autostart" (resolved automatically for "autostart" to default services). + items: + type: string + example: + {"action": "start", "checks": ["svc1"]} + responses: + "202": + description: Accepted - asynchronous operation started. + content: + application/json: + schema: + $ref: "#/components/schemas/PostServicesResponse" + example: + { + "type": "async", + "status-code": 202, + "status": "Accepted", + "change": "25", + "result": null + } /v1/exec: post: summary: Execute a command From a22b1ef52877dae186b2a18a29180c1f41d1dce9 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 10:25:32 +1300 Subject: [PATCH 06/47] Fix merge. --- client/checks.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/client/checks.go b/client/checks.go index bcaf1434f..cd557c8ad 100644 --- a/client/checks.go +++ b/client/checks.go @@ -16,9 +16,9 @@ package client import ( "bytes" + "context" "encoding/json" "fmt" - "context" "net/url" ) @@ -76,7 +76,7 @@ type CheckInfo struct { // Status is the status of this check: "up" if healthy, "down" if the // number of failures has reached the configured threshold, "inactive" if - // the check is disabled. + // the check is inactive. Status CheckStatus `json:"status"` // Failures is the number of times in a row this check has failed. It is @@ -159,9 +159,22 @@ func (client *Client) doMultiCheckAction(actionName string, checks []string) (ch "Content-Type": "application/json", } - resp, err := client.doSync("POST", "/v1/checks", nil, headers, bytes.NewBuffer(data), nil) + resp, err := client.Requester().Do(context.Background(), &RequestOptions{ + Type: SyncRequest, + Method: "POST", + Path: "/v1/checks", + Query: nil, + Headers: headers, + Body: bytes.NewBuffer(data), + }) if err != nil { return "", err } - return string(resp.Result), nil + var response string + err = resp.DecodeResult(&response) + if err != nil { + return "", err + } + + return response, nil } From e76435c62551be5517b661d9d5325af8a6549440 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 10:38:42 +1300 Subject: [PATCH 07/47] Ignore inactive checks in health calculation. --- internals/daemon/api_health.go | 3 ++- internals/daemon/api_health_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/internals/daemon/api_health.go b/internals/daemon/api_health.go index 28eef5e67..cb5a0b8d5 100644 --- a/internals/daemon/api_health.go +++ b/internals/daemon/api_health.go @@ -51,7 +51,8 @@ func v1Health(c *Command, r *http.Request, _ *UserState) Response { levelMatch := level == plan.UnsetLevel || level == check.Level || level == plan.ReadyLevel && check.Level == plan.AliveLevel // ready implies alive namesMatch := len(names) == 0 || strutil.ListContains(names, check.Name) - if levelMatch && namesMatch && check.Status != checkstate.CheckStatusUp { + // CheckStatusUp is healthy, and CheckStatusInactive is ignored. + if levelMatch && namesMatch && check.Status == checkstate.CheckStatusDown { healthy = false status = http.StatusBadGateway } diff --git a/internals/daemon/api_health_test.go b/internals/daemon/api_health_test.go index 8a67c80ce..054a168f8 100644 --- a/internals/daemon/api_health_test.go +++ b/internals/daemon/api_health_test.go @@ -54,6 +54,7 @@ func (s *healthSuite) TestHealthy(c *C) { return []*checkstate.CheckInfo{ {Name: "chk1", Status: checkstate.CheckStatusUp}, {Name: "chk2", Status: checkstate.CheckStatusUp}, + {Name: "chk3", Status: checkstate.CheckStatusInactive}, }, nil }) defer restore() @@ -72,6 +73,7 @@ func (s *healthSuite) TestUnhealthy(c *C) { {Name: "chk1", Status: checkstate.CheckStatusUp}, {Name: "chk2", Status: checkstate.CheckStatusDown}, {Name: "chk3", Status: checkstate.CheckStatusUp}, + {Name: "chk4", Status: checkstate.CheckStatusInactive}, }, nil }) defer restore() @@ -152,6 +154,7 @@ func (s *healthSuite) TestNames(c *C) { {Name: "chk1", Status: checkstate.CheckStatusDown}, {Name: "chk2", Status: checkstate.CheckStatusUp}, {Name: "chk3", Status: checkstate.CheckStatusUp}, + {Name: "chk4", Status: checkstate.CheckStatusInactive}, }, nil }) defer restore() @@ -179,6 +182,27 @@ func (s *healthSuite) TestNames(c *C) { c.Assert(response, DeepEquals, map[string]interface{}{ "healthy": true, }) + + // With only an inactive check, this is the same as no checks, so healthy. + status, response = serveHealth(c, "GET", "/v1/health?names=chk4", nil) + c.Assert(status, Equals, 200) + c.Assert(response, DeepEquals, map[string]interface{}{ + "healthy": true, + }) + + // One healthy check, one that should be ignored. + status, response = serveHealth(c, "GET", "/v1/health?names=chk2,chk4", nil) + c.Assert(status, Equals, 200) + c.Assert(response, DeepEquals, map[string]interface{}{ + "healthy": true, + }) + + // One unhealthy check, one that should be ignored. + status, response = serveHealth(c, "GET", "/v1/health?names=chk1,chk4", nil) + c.Assert(status, Equals, 502) + c.Assert(response, DeepEquals, map[string]interface{}{ + "healthy": false, + }) } func (s *healthSuite) TestBadLevel(c *C) { From 85021c296a6e6f5d41e104b968eb42b05bb7f30c Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 10:41:41 +1300 Subject: [PATCH 08/47] Clarify behaviour when a layer is added. --- docs/reference/health-checks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/health-checks.md b/docs/reference/health-checks.md index 95716fe48..2cee3a046 100644 --- a/docs/reference/health-checks.md +++ b/docs/reference/health-checks.md @@ -142,7 +142,7 @@ When a check is stopped, the active `perform-check` or `recover-check` change is You can stop one or more checks using the `pebble stop-checks` command. A stopped check shows in the `pebble checks` output as "inactive" status, and the check will no longer be executed until the check is started again. Stopped (inactive) checks appear in check lists but do not contribute to any overall health calculations - they behave as if the check did not exist. -A stopped check that has `startup` set to `enabled` will be started in a `replan` operation. Stopped checks can also be manually started via the `pebble start-checks` command. +A stopped check that has `startup` set to `enabled` will be started in a `replan` operation and when the layer is first added. Stopped checks can also be manually started via the `pebble start-checks` command. Checks that have `startup` set to `disabled` will be added in a stopped (inactive) state. These checks will only be started when instructed by a `pebble start-checks` command. From c0d483eebed895f24bb3412a9e833a01909924b4 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 10:46:36 +1300 Subject: [PATCH 09/47] Adjust error messages as per discussion in https://github.com/canonical/pebble/pull/557/ --- internals/daemon/api_changes.go | 2 +- internals/daemon/api_checks.go | 4 ++-- internals/daemon/api_services.go | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internals/daemon/api_changes.go b/internals/daemon/api_changes.go index 46e75d3cc..962833b19 100644 --- a/internals/daemon/api_changes.go +++ b/internals/daemon/api_changes.go @@ -243,7 +243,7 @@ func v1PostChange(c *Command, r *http.Request, _ *UserState) Response { } if reqData.Action != "abort" { - return BadRequest("change action %q is unsupported", reqData.Action) + return BadRequest("invalid action %q", reqData.Action) } if chg.Status().Ready() { diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 845016d35..538145770 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -89,7 +89,7 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { } default: if len(payload.Checks) == 0 { - return BadRequest("no checks to %s provided", payload.Action) + return BadRequest("must specify checks for %s action", payload.Action) } } @@ -104,7 +104,7 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { case "stop": checks, err = checkmgr.StopChecks(plan, payload.Checks) default: - return BadRequest("action %q is unsupported", payload.Action) + return BadRequest("invalid action %q", payload.Action) } if err != nil { return BadRequest("cannot %s checks: %v", payload.Action, err) diff --git a/internals/daemon/api_services.go b/internals/daemon/api_services.go index dfc045c6b..86d5a8875 100644 --- a/internals/daemon/api_services.go +++ b/internals/daemon/api_services.go @@ -95,7 +95,7 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { payload.Services = services default: if len(payload.Services) == 0 { - return BadRequest("no services to %s provided", payload.Action) + return BadRequest("must specify services for %s action", payload.Action) } } @@ -182,7 +182,7 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { sort.Strings(services) payload.Services = services default: - return BadRequest("action %q is unsupported", payload.Action) + return BadRequest("invalid action %q", payload.Action) } if err != nil { return BadRequest("cannot %s services: %v", payload.Action, err) From d927fb118b012c5c77de457a6892c64dbe5a5ab0 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 10:51:37 +1300 Subject: [PATCH 10/47] Get rid of autostart for checks - there doesn't really seem to be a need, and it wasn't in the spec. --- client/checks.go | 7 ------- docs/specs/openapi.yaml | 6 ++---- internals/daemon/api_checks.go | 13 +++---------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/client/checks.go b/client/checks.go index cd557c8ad..6c2e1613a 100644 --- a/client/checks.go +++ b/client/checks.go @@ -120,13 +120,6 @@ func (client *Client) Checks(opts *ChecksOptions) ([]*CheckInfo, error) { return checks, nil } -// AutoStart starts the checks marked as "startup: enabled". opts.Names must -// be empty for this call. We ignore ops.Level for this action. -func (client *Client) AutoStartChecks(opts *ChecksOptions) (response string, err error) { - response, err = client.doMultiCheckAction("autostart", opts.Names) - return response, err -} - // Start starts the checks named in opts.Names. We ignore ops.Level for this // action. func (client *Client) StartChecks(opts *ChecksOptions) (response string, err error) { diff --git a/docs/specs/openapi.yaml b/docs/specs/openapi.yaml index 748801540..66c08bb5f 100644 --- a/docs/specs/openapi.yaml +++ b/docs/specs/openapi.yaml @@ -326,13 +326,11 @@ paths: action: type: string description: The action to perform. - enum: ["autostart", "start", "stop"] + enum: ["start", "stop"] checks: type: array description: | - A list of service names. Required for "start" and "stop". - - Ignored for "autostart" (resolved automatically for "autostart" to default services). + A list of service names. Required. items: type: string example: diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 538145770..9c7bbb957 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -82,15 +82,8 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { return BadRequest("cannot decode data from request body: %v", err) } - switch payload.Action { - case "autostart": - if len(payload.Checks) != 0 { - return BadRequest("%s accepts no check names", payload.Action) - } - default: - if len(payload.Checks) == 0 { - return BadRequest("must specify checks for %s action", payload.Action) - } + if len(payload.Checks) == 0 { + return BadRequest("must specify checks for %s action", payload.Action) } var err error @@ -99,7 +92,7 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { plan := c.d.overlord.PlanManager().Plan() switch payload.Action { - case "start", "autostart": + case "start": checks, err = checkmgr.StartChecks(plan, payload.Checks) case "stop": checks, err = checkmgr.StopChecks(plan, payload.Checks) From d5651533a9163e32403fb53354a081bfcea5a2d7 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 11:37:31 +1300 Subject: [PATCH 11/47] Small doc improvement. --- client/checks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/checks.go b/client/checks.go index 6c2e1613a..e12b03c9e 100644 --- a/client/checks.go +++ b/client/checks.go @@ -25,7 +25,7 @@ import ( type ChecksOptions struct { // Level is the check level to query for. A check is included in the // results if this field is not set, or if it is equal to the check's - // level. + // level. This field is ignored for start and stop actions. Level CheckLevel // Names is the list of check names on which to action. For querying, a From 708879ef5985de6d3fb1989e577da5e9e01f7c59 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 11:41:29 +1300 Subject: [PATCH 12/47] Use a more consistent error message. --- internals/overlord/checkstate/manager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 7d4a5dc44..837bed72e 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -389,7 +389,7 @@ func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]s // If the check is not in the plan, return an error. check, ok := currentPlan.Checks[name] if !ok { - return nil, fmt.Errorf("check %s is not in the current plan", name) + return nil, fmt.Errorf("cannot find check %q in plan", name) } info, ok := m.checks[name] if !ok { @@ -419,7 +419,7 @@ func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]st // If the check is not in the current plan, return an error. check, ok := currentPlan.Checks[name] if !ok { - return nil, fmt.Errorf("check %s is not in the current plan", name) + return nil, fmt.Errorf("cannot find check %q in plan", name) } info, ok := m.checks[name] if !ok { From bfc6a7f50d71dd4ce111fbbe9f7d62cbf67e3470 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 12:01:25 +1300 Subject: [PATCH 13/47] Explain how the CLI reference docs are updated. --- HACKING.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/HACKING.md b/HACKING.md index 501fac243..a5a07fca5 100644 --- a/HACKING.md +++ b/HACKING.md @@ -226,6 +226,31 @@ To pull in the latest style and dependencies from the starter pack, clone the [C - Under the `docs/` folder, run `python3 build_requirements.py`. This generates the latest `requirements.txt` under the `.sphinx/` folder. - Under the `docs/` folder, run `tox -e docs-dep` to compile a pinned requirements file for tox environments. +## Updating the CLI reference documentation + +To add a new CLI command, ensure that it is added in the list at the top of the [doc](docs/reference/cli-commands.md) in the appropriate section, and then add a new section for the details **in alphabetical order**. + +The section should look like: + +``` +(reference_pebble_{command name}_command)= +## {command name} + +The `{command name}` command is used to {describe the command}. + + +```{terminal} +:input: pebble {command name} --help +``` + +``` + +With `{command name}` replaced by the name of the command and `{describe the command}` replaced by a suitable description. + +In the `docs` directory, run `tox -e commands` to automatically update the CLI reference documentation. + +A CI workflow will fail if the CLI reference documentation does not match the actual output from Pebble. + ### Writing a great doc - Use short sentences, ideally with one or two clauses. From 131eaaedf3aa81c1ba558741be13e71aa77d35ce Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 12:01:37 +1300 Subject: [PATCH 14/47] Update the help reference documentation. --- docs/reference/cli-commands.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/cli-commands.md b/docs/reference/cli-commands.md index 46f0ce6b8..7d68f06b0 100644 --- a/docs/reference/cli-commands.md +++ b/docs/reference/cli-commands.md @@ -236,9 +236,9 @@ Commands can be classified as follows: Run: run Info: help, version - Plan: add, plan - Services: services, logs, start, restart, signal, stop, replan - Checks: checks, health + Plan: add, plan, replan + Services: services, logs, start, restart, signal, stop + Checks: checks, start-checks, stop-checks, health Files: push, pull, ls, mkdir, rm, exec Changes: changes, tasks Notices: warnings, okay, notices, notice, notify From 59365b5f339fc249153087510a06ff644bdbe4cd Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 12:19:30 +1300 Subject: [PATCH 15/47] Put the default startup behaviour in the manager, so that the exported plan doesn't gain a lot of 'startup: enabled' lines. --- internals/overlord/checkstate/manager.go | 8 +++++--- internals/plan/plan.go | 11 ++--------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 837bed72e..650d4e950 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -141,8 +141,10 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { // We exclude any checks that have changed from startup:enabled // to startup:disabled, because these should now be inactive and // only started when explicitly requested. + // If the plan doesn't have an explicit startup value, it + // defaults to enabled, for backwards compatibility. if newConfig.Startup == plan.CheckStartupDisabled && - oldConfig.Startup == plan.CheckStartupEnabled { + (oldConfig.Startup == plan.CheckStartupEnabled || oldConfig.Startup == plan.CheckStartupUnknown) { continue } // Check is in old and new plans and has been modified. @@ -155,10 +157,10 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { } // Also find checks that are new (in new plan but not in old one) and have - // `startup` set to `enabled`. + // `startup` set to `enabled` (or not explicitly set). for _, config := range newPlan.Checks { if !existingChecks[config.Name] { - if config.Startup == plan.CheckStartupEnabled { + if config.Startup == plan.CheckStartupEnabled || config.Startup == plan.CheckStartupUnknown { newOrModified[config.Name] = true } else { // Check is new and should be inactive - no need to start it, diff --git a/internals/plan/plan.go b/internals/plan/plan.go index 1fa82d308..1c47a61bb 100644 --- a/internals/plan/plan.go +++ b/internals/plan/plan.go @@ -501,7 +501,7 @@ const ( type CheckStartup string const ( - UnsetCheckStartup CheckStartup = "" + CheckStartupUnknown CheckStartup = "" CheckStartupEnabled CheckStartup = "enabled" CheckStartupDisabled CheckStartup = "disabled" ) @@ -807,13 +807,6 @@ func CombineLayers(layers ...*Layer) (*Layer, error) { } for _, check := range combined.Checks { - if check.Startup == UnsetCheckStartup { - // Services default to startup:disabled, and it would be nice if - // checks aligned with that. However, when checks were added there - // wasn't a startup field, and they were effectively startup:enabled - // so we need to keep that behaviour, at least until Pebble 2.0. - check.Startup = CheckStartupEnabled - } if !check.Period.IsSet { check.Period.Value = defaultCheckPeriod } @@ -928,7 +921,7 @@ func (layer *Layer) Validate() error { Message: fmt.Sprintf(`plan check %q level must be "alive" or "ready"`, name), } } - if check.Startup != UnsetCheckStartup && check.Startup != CheckStartupEnabled && check.Startup != CheckStartupDisabled { + if check.Startup != CheckStartupUnknown && check.Startup != CheckStartupEnabled && check.Startup != CheckStartupDisabled { return &FormatError{ Message: fmt.Sprintf(`plan check %q startup must be "enabled" or "disabled"`, name), } From cf7174df04609192ba90beff80a03de8832d52d0 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 17 Jan 2025 16:14:07 +1300 Subject: [PATCH 16/47] Check that all the checks exist upfront, rather than erroring out partway through. --- internals/overlord/checkstate/manager.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 650d4e950..86ef98a74 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -386,13 +386,15 @@ func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]s m.state.Lock() defer m.state.Unlock() - var started []string + // If any check specified is not in the plan, return an error. for _, name := range checks { - // If the check is not in the plan, return an error. - check, ok := currentPlan.Checks[name] - if !ok { + if _, ok := currentPlan.Checks[name]; !ok { return nil, fmt.Errorf("cannot find check %q in plan", name) } + } + var started []string + for _, name := range checks { + check := currentPlan.Checks[name] // We know this is ok because we checked it above. info, ok := m.checks[name] if !ok { panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) @@ -416,13 +418,15 @@ func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]st m.state.Lock() defer m.state.Unlock() - var stopped []string + // If any check specified is not in the plan, return an error. for _, name := range checks { - // If the check is not in the current plan, return an error. - check, ok := currentPlan.Checks[name] - if !ok { + if _, ok := currentPlan.Checks[name]; !ok { return nil, fmt.Errorf("cannot find check %q in plan", name) } + } + var stopped []string + for _, name := range checks { + check := currentPlan.Checks[name] // We know this is ok because we checked it above. info, ok := m.checks[name] if !ok { panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) From 239443d88bbf45ee32d4348544527347e049ce25 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 20 Jan 2025 14:21:59 +1300 Subject: [PATCH 17/47] Fix existing tests. --- client/checks_test.go | 2 +- internals/cli/cmd_checks_test.go | 34 ++++---- internals/cli/cmd_start-checks.go | 7 +- internals/cli/cmd_start-checks_test.go | 83 +------------------ internals/cli/cmd_stop-checks_test.go | 83 +------------------ internals/daemon/api_checks.go | 6 -- internals/daemon/api_checks_test.go | 18 ++-- internals/overlord/checkstate/manager.go | 10 ++- internals/overlord/checkstate/manager_test.go | 26 +++--- 9 files changed, 57 insertions(+), 212 deletions(-) diff --git a/client/checks_test.go b/client/checks_test.go index 794e47ee1..f2758b4c5 100644 --- a/client/checks_test.go +++ b/client/checks_test.go @@ -27,7 +27,7 @@ func (cs *clientSuite) TestChecksGet(c *check.C) { "result": [ {"name": "chk1", "status": "up"}, {"name": "chk3", "status": "down", "failures": 42}, - {"name": "chk5", "status": "inactive"}, + {"name": "chk5", "status": "inactive"} ], "status": "OK", "status-code": 200, diff --git a/internals/cli/cmd_checks_test.go b/internals/cli/cmd_checks_test.go index e70889978..1e34a8cfb 100644 --- a/internals/cli/cmd_checks_test.go +++ b/internals/cli/cmd_checks_test.go @@ -35,11 +35,12 @@ func (s *PebbleSuite) TestChecks(c *check.C) { "type": "sync", "status-code": 200, "result": [ - {"name": "chk1", "status": "up", "threshold": 3, "change-id": "1"}, - {"name": "chk2", "status": "down", "failures": 1, "threshold": 1, "change-id": "2"}, - {"name": "chk3", "level": "alive", "status": "down", "failures": 42, "threshold": 3, "change-id": "3"}, - {"name": "chk4", "status": "down", "failures": 6, "threshold": 2, "change-id": "4"}, - {"name": "chk5", "status": "down", "failures": 3, "threshold": 2, "change-id": "5"} + {"name": "chk1", "startup": "enabled", "status": "up", "threshold": 3, "change-id": "1"}, + {"name": "chk2", "startup": "enabled", "status": "down", "failures": 1, "threshold": 1, "change-id": "2"}, + {"name": "chk3", "startup": "enabled", "level": "alive", "status": "down", "failures": 42, "threshold": 3, "change-id": "3"}, + {"name": "chk4", "startup": "enabled", "status": "down", "failures": 6, "threshold": 2, "change-id": "4"}, + {"name": "chk5", "startup": "enabled", "status": "down", "failures": 3, "threshold": 2, "change-id": "5"}, + {"name": "chk6", "startup": "disabled", "status": "inactive", "failures": 0, "threshold": 3, "change-id": ""} ] }`) case "/v1/changes/2": @@ -89,12 +90,13 @@ func (s *PebbleSuite) TestChecks(c *check.C) { c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) c.Check(s.Stdout(), check.Equals, ` -Check Level Status Failures Change -chk1 - up 0/3 1 -chk2 - down 1/1 2 (second) -chk3 alive down 42/3 3 (cannot get change 3) -chk4 - down 6/2 4 (Get "http://localhost:8000/": dial tc... run "pebble tasks 4" for more) -chk5 - down 3/2 5 (error with some\nline breaks\nin it\n) +Check Level Startup Status Failures Change +chk1 - enabled up 0/3 1 +chk2 - enabled down 1/1 2 (second) +chk3 alive enabled down 42/3 3 (cannot get change 3) +chk4 - enabled down 6/2 4 (Get "http://localhost:8000/": dial tc... run "pebble tasks 4" for more) +chk5 - enabled down 3/2 5 (error with some\nline breaks\nin it\n) +chk6 - disabled inactive - - `[1:]) c.Check(s.Stderr(), check.Equals, "") } @@ -144,8 +146,8 @@ func (s *PebbleSuite) TestChecksFiltering(c *check.C) { "type": "sync", "status-code": 200, "result": [ - {"name": "chk1", "status": "up", "threshold": 3}, - {"name": "chk3", "level": "alive", "status": "down", "failures": 42, "threshold": 3} + {"name": "chk1", "startup": "enabled", "status": "up", "threshold": 3}, + {"name": "chk3", "startup": "enabled", "level": "alive", "status": "down", "failures": 42, "threshold": 3} ] }`) }) @@ -153,9 +155,9 @@ func (s *PebbleSuite) TestChecksFiltering(c *check.C) { c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) c.Check(s.Stdout(), check.Equals, ` -Check Level Status Failures Change -chk1 - up 0/3 - -chk3 alive down 42/3 - +Check Level Startup Status Failures Change +chk1 - enabled up 0/3 - +chk3 alive enabled down 42/3 - `[1:]) c.Check(s.Stderr(), check.Equals, "") } diff --git a/internals/cli/cmd_start-checks.go b/internals/cli/cmd_start-checks.go index c85f453aa..58e6095f0 100644 --- a/internals/cli/cmd_start-checks.go +++ b/internals/cli/cmd_start-checks.go @@ -15,7 +15,6 @@ package cli import ( - "encoding/json" "fmt" "github.com/canonical/go-flags" @@ -62,10 +61,6 @@ func (cmd cmdStartChecks) Execute(args []string) error { return err } - var result interface{} - if err := json.Unmarshal([]byte(response), &result); err != nil { - return fmt.Errorf("failed to decode JSON response: %w", err) - } - fmt.Fprintln(Stdout, result) + fmt.Fprintln(Stdout, response) return nil } diff --git a/internals/cli/cmd_start-checks_test.go b/internals/cli/cmd_start-checks_test.go index 9d250ef82..63502ca8e 100644 --- a/internals/cli/cmd_start-checks_test.go +++ b/internals/cli/cmd_start-checks_test.go @@ -25,24 +25,6 @@ import ( func (s *PebbleSuite) TestStartChecks(c *check.C) { s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/v1/changes/28" { - c.Check(r.Method, check.Equals, "GET") - fmt.Fprintf(w, `{ - "type": "sync", - "result": { - "id": "28", - "kind": "start", - "summary": "...", - "status": "Done", - "ready": true, - "spawn-time": "2016-04-21T01:02:03Z", - "ready-time": "2016-04-21T01:02:04Z", - "tasks": [] - } - }`) - return - } - c.Check(r.Method, check.Equals, "POST") c.Check(r.URL.Path, check.Equals, "/v1/checks") @@ -53,16 +35,16 @@ func (s *PebbleSuite) TestStartChecks(c *check.C) { }) fmt.Fprintf(w, `{ - "type": "async", - "status-code": 202, - "change": "28" + "type": "sync", + "status-code": 200, + "result": "Queued \"start\" for check chk1 and 1 more" }`) }) rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2"}) c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) - c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stdout(), check.Equals, "Queued \"start\" for check chk1 and 1 more\n") c.Check(s.Stderr(), check.Equals, "") } @@ -86,60 +68,3 @@ func (s *PebbleSuite) TestStartChecksFails(c *check.C) { c.Check(s.Stdout(), check.Equals, "") c.Check(s.Stderr(), check.Equals, "") } - -func (s *PebbleSuite) TestStartChecksNoWait(c *check.C) { - s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { - c.Check(r.Method, check.Equals, "POST") - c.Check(r.URL.Path, check.Equals, "/v1/checks") - c.Check(r.URL.Path, check.Not(check.Equals), "/v1/changes/28") - - body := DecodedRequestBody(c, r) - c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "start", - "checks": []interface{}{"chk1", "chk2"}, - }) - - fmt.Fprintf(w, `{ - "type": "async", - "status-code": 202, - "change": "28" - }`) - }) - - rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2", "--no-wait"}) - c.Assert(err, check.IsNil) - c.Assert(rest, check.HasLen, 0) - c.Check(s.Stdout(), check.Equals, "28\n") - c.Check(s.Stderr(), check.Equals, "") -} - -func (s *PebbleSuite) TestStartChecksFailsGetChange(c *check.C) { - s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/v1/changes/28" { - c.Check(r.Method, check.Equals, "GET") - fmt.Fprintf(w, `{"type": "error", "result": {"message": "could not bar"}}`) - return - } - - c.Check(r.Method, check.Equals, "POST") - c.Check(r.URL.Path, check.Equals, "/v1/checks") - - body := DecodedRequestBody(c, r) - c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "start", - "checks": []interface{}{"chk1", "chk2"}, - }) - - fmt.Fprintf(w, `{ - "type": "async", - "status-code": 202, - "change": "28" - }`) - }) - - rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2"}) - c.Assert(err, check.ErrorMatches, "could not bar") - c.Assert(rest, check.HasLen, 1) - c.Check(s.Stdout(), check.Equals, "") - c.Check(s.Stderr(), check.Equals, "") -} diff --git a/internals/cli/cmd_stop-checks_test.go b/internals/cli/cmd_stop-checks_test.go index e8ce7adb3..1d3e360ec 100644 --- a/internals/cli/cmd_stop-checks_test.go +++ b/internals/cli/cmd_stop-checks_test.go @@ -25,24 +25,6 @@ import ( func (s *PebbleSuite) TestStopChecks(c *check.C) { s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/v1/changes/25" { - c.Check(r.Method, check.Equals, "GET") - fmt.Fprintf(w, `{ - "type": "sync", - "result": { - "id": "25", - "kind": "stop", - "summary": "...", - "status": "Done", - "ready": true, - "spawn-time": "2016-04-21T01:02:03Z", - "ready-time": "2016-04-21T01:02:04Z", - "tasks": [] - } - }`) - return - } - c.Check(r.Method, check.Equals, "POST") c.Check(r.URL.Path, check.Equals, "/v1/checks") @@ -53,16 +35,16 @@ func (s *PebbleSuite) TestStopChecks(c *check.C) { }) fmt.Fprintf(w, `{ - "type": "async", - "status-code": 202, - "change": "25" + "type": "sync", + "status-code": 200, + "result": "Queued \"stop\" for check chk1 and 1 more" }`) }) rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2"}) c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) - c.Check(s.Stdout(), check.Equals, "") + c.Check(s.Stdout(), check.Equals, "Queued \"stop\" for check chk1 and 1 more\n") c.Check(s.Stderr(), check.Equals, "") } @@ -86,60 +68,3 @@ func (s *PebbleSuite) TestStopChecksFails(c *check.C) { c.Check(s.Stdout(), check.Equals, "") c.Check(s.Stderr(), check.Equals, "") } - -func (s *PebbleSuite) TestStopChecksNoWait(c *check.C) { - s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { - c.Check(r.Method, check.Equals, "POST") - c.Check(r.URL.Path, check.Equals, "/v1/checks") - c.Check(r.URL.Path, check.Not(check.Equals), "/v1/changes/25") - - body := DecodedRequestBody(c, r) - c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "stop", - "checks": []interface{}{"chk1", "chk2"}, - }) - - fmt.Fprintf(w, `{ - "type": "async", - "status-code": 202, - "change": "25" - }`) - }) - - rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2", "--no-wait"}) - c.Assert(err, check.IsNil) - c.Assert(rest, check.HasLen, 0) - c.Check(s.Stdout(), check.Equals, "46\n") - c.Check(s.Stderr(), check.Equals, "") -} - -func (s *PebbleSuite) TestStopChecksFailsGetChange(c *check.C) { - s.RedirectClientToTestServer(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/v1/changes/25" { - c.Check(r.Method, check.Equals, "GET") - fmt.Fprintf(w, `{"type": "error", "result": {"message": "could not bar"}}`) - return - } - - c.Check(r.Method, check.Equals, "POST") - c.Check(r.URL.Path, check.Equals, "/v1/checks") - - body := DecodedRequestBody(c, r) - c.Check(body, check.DeepEquals, map[string]interface{}{ - "action": "stop", - "checks": []interface{}{"chk1", "chk2"}, - }) - - fmt.Fprintf(w, `{ - "type": "async", - "status-code": 202, - "change": "25" - }`) - }) - - rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2"}) - c.Assert(err, check.ErrorMatches, "could not bar") - c.Assert(rest, check.HasLen, 1) - c.Check(s.Stdout(), check.Equals, "") - c.Check(s.Stderr(), check.Equals, "") -} diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 9c7bbb957..1fa5f2c31 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -106,12 +106,6 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { st := c.d.overlord.State() st.EnsureBefore(0) // start and stop tasks right away - // TODO: figure out what the response should be - nothing? A message? If not - // nothing, then it ought to be a JSON payload, so what's the format. It's - // all messy right now, and the cmd_*-checks.go files need to be aligned as - // well. Maybe there is an existing return object like BadRequest but good - // that would be appropriate? - var result string if len(checks) == 0 { result = fmt.Sprintf("No checks needed to %s", payload.Action) diff --git a/internals/daemon/api_checks_test.go b/internals/daemon/api_checks_test.go index 4eb35b49b..e3bd30614 100644 --- a/internals/daemon/api_checks_test.go +++ b/internals/daemon/api_checks_test.go @@ -57,9 +57,9 @@ checks: c.Check(rsp.Status, Equals, 200) c.Check(rsp.Type, Equals, ResponseTypeSync) expected := []interface{}{ - map[string]interface{}{"name": "chk1", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, - map[string]interface{}{"name": "chk2", "status": "up", "level": "alive", "threshold": 3.0, "change-id": "C1"}, - map[string]interface{}{"name": "chk3", "status": "up", "threshold": 3.0, "change-id": "C2"}, + map[string]interface{}{"name": "chk1", "startup": "enabled", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, + map[string]interface{}{"name": "chk2", "startup": "enabled", "status": "up", "level": "alive", "threshold": 3.0, "change-id": "C1"}, + map[string]interface{}{"name": "chk3", "startup": "enabled", "status": "up", "threshold": 3.0, "change-id": "C2"}, } if reflect.DeepEqual(body["result"], expected) { break @@ -76,8 +76,8 @@ checks: c.Check(rsp.Status, Equals, 200) c.Check(rsp.Type, Equals, ResponseTypeSync) c.Check(body["result"], DeepEquals, []interface{}{ - map[string]interface{}{"name": "chk1", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, - map[string]interface{}{"name": "chk3", "status": "up", "threshold": 3.0, "change-id": "C1"}, + map[string]interface{}{"name": "chk1", "startup": "enabled", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, + map[string]interface{}{"name": "chk3", "startup": "enabled", "status": "up", "threshold": 3.0, "change-id": "C1"}, }) // Request with names filter (comma-separated values) @@ -85,8 +85,8 @@ checks: c.Check(rsp.Status, Equals, 200) c.Check(rsp.Type, Equals, ResponseTypeSync) c.Check(body["result"], DeepEquals, []interface{}{ - map[string]interface{}{"name": "chk1", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, - map[string]interface{}{"name": "chk3", "status": "up", "threshold": 3.0, "change-id": "C1"}, + map[string]interface{}{"name": "chk1", "startup": "enabled", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, + map[string]interface{}{"name": "chk3", "startup": "enabled", "status": "up", "threshold": 3.0, "change-id": "C1"}, }) // Request with level filter @@ -94,7 +94,7 @@ checks: c.Check(rsp.Status, Equals, 200) c.Check(rsp.Type, Equals, ResponseTypeSync) c.Check(body["result"], DeepEquals, []interface{}{ - map[string]interface{}{"name": "chk2", "status": "up", "level": "alive", "threshold": 3.0, "change-id": "C0"}, + map[string]interface{}{"name": "chk2", "startup": "enabled", "status": "up", "level": "alive", "threshold": 3.0, "change-id": "C0"}, }) // Request with names and level filters @@ -102,7 +102,7 @@ checks: c.Check(rsp.Status, Equals, 200) c.Check(rsp.Type, Equals, ResponseTypeSync) c.Check(body["result"], DeepEquals, []interface{}{ - map[string]interface{}{"name": "chk1", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, + map[string]interface{}{"name": "chk1", "startup": "enabled", "status": "up", "level": "ready", "threshold": 3.0, "change-id": "C0"}, }) } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 86ef98a74..e5bee09d9 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -339,10 +339,14 @@ func (m *CheckManager) updateCheckInfo(config *plan.Check, changeID string, fail } else if failures >= config.Threshold { status = CheckStatusDown } + startup := config.Startup + if startup == plan.CheckStartupUnknown { + startup = plan.CheckStartupEnabled + } m.checks[config.Name] = CheckInfo{ Name: config.Name, Level: config.Level, - Startup: config.Startup, + Startup: startup, Status: status, Failures: failures, Threshold: config.Threshold, @@ -394,7 +398,7 @@ func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]s } var started []string for _, name := range checks { - check := currentPlan.Checks[name] // We know this is ok because we checked it above. + check := currentPlan.Checks[name] // We know this is ok because we checked it above. info, ok := m.checks[name] if !ok { panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) @@ -426,7 +430,7 @@ func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]st } var stopped []string for _, name := range checks { - check := currentPlan.Checks[name] // We know this is ok because we checked it above. + check := currentPlan.Checks[name] // We know this is ok because we checked it above. info, ok := m.checks[name] if !ok { panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index a455c6a0f..e12b3cdc8 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -102,9 +102,9 @@ func (s *ManagerSuite) TestChecks(c *C) { // Wait for expected checks to be started. waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Status: "up", Threshold: 3}, - {Name: "chk2", Status: "up", Level: "alive", Threshold: 3}, - {Name: "chk3", Status: "up", Level: "ready", Threshold: 3}, + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "enabled", Status: "up", Level: "alive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Level: "ready", Threshold: 3}, }) // Re-configuring should update checks. @@ -121,7 +121,7 @@ func (s *ManagerSuite) TestChecks(c *C) { // Wait for checks to be updated. waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk4", Status: "up", Threshold: 3}, + {Name: "chk4", Startup: "enabled", Status: "up", Threshold: 3}, }) } @@ -342,9 +342,9 @@ func (s *ManagerSuite) TestPlanChangedSmarts(c *C) { }) waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Status: "up", Threshold: 3}, - {Name: "chk2", Status: "up", Threshold: 3}, - {Name: "chk3", Status: "up", Threshold: 3}, + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, }) checks, err := s.manager.Checks() c.Assert(err, IsNil) @@ -373,8 +373,8 @@ func (s *ManagerSuite) TestPlanChangedSmarts(c *C) { }) waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Status: "up", Threshold: 3}, - {Name: "chk2", Status: "up", Threshold: 6}, + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "enabled", Status: "up", Threshold: 6}, }) checks, err = s.manager.Checks() c.Assert(err, IsNil) @@ -426,8 +426,8 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { s.manager.PlanChanged(origPlan) waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Status: "up", Threshold: 3}, - {Name: "chk2", Status: "up", Threshold: 3}, + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "enabled", Status: "up", Threshold: 3}, }) checks, err := s.manager.Checks() c.Assert(err, IsNil) @@ -454,8 +454,8 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { }) waitChecks(c, s.manager, []*checkstate.CheckInfo{ - {Name: "chk1", Status: "up", Threshold: 3}, - {Name: "chk2", Status: "up", Threshold: 3}, + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "enabled", Status: "up", Threshold: 3}, }) checks, err = s.manager.Checks() c.Assert(err, IsNil) From 5e641a2de8542d0716782c28d4bec8715606e034 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 20 Jan 2025 20:30:46 +1300 Subject: [PATCH 18/47] Add a temporary solution to starting checks in a replan. --- internals/daemon/api_services.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/internals/daemon/api_services.go b/internals/daemon/api_services.go index 86d5a8875..4335b0d71 100644 --- a/internals/daemon/api_services.go +++ b/internals/daemon/api_services.go @@ -99,6 +99,31 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { } } + // TODO (ping @benhoyt) This is clearly the wrong place for this - it's out + // of the switch below (to avoid deadlocking on the state lock) but even + // more so, it doesn't seem correct for the services API to be aware of the + // check manager's need to be notified. + // It seems like the right thing to do would be to trigger a PlanChanged + // (the doc says that it might happen without the plan changing, which is + // the case here - but alternatively there could be a ReplanRequested + // listener that works similarly). However, notifying the PlanChanged + // listeners is a planstate manager thing, and planstate seems to be more + // "build the layers into a plan" than actually taking action on the plan. + // Maybe planstate should gain a Replan method that does call the listeners + // but then this code would be calling that method and it's not clear what + // it would do *other* that calling the listeners. planstate's + // callChangeListeners could get a public interface, but it still seems like + // it's around the wrong way for the services API to be calling it. + // If you don't instinctively know the right way to do this, I'm happy to + // have a call to discuss. I missed this earlier (all my replan testing also + // had changes to the plan happening so services were started up again), so + // have only had a small amount of time to think it over/try things out. + if payload.Action == "replan" { + checkmgr := c.d.overlord.CheckManager() + plan := c.d.overlord.PlanManager().Plan() + defer checkmgr.PlanChanged(plan) + } + st := c.d.overlord.State() st.Lock() defer st.Unlock() @@ -181,6 +206,7 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { } sort.Strings(services) payload.Services = services + default: return BadRequest("invalid action %q", payload.Action) } From f3c368cdde2d1562a68a2c1dc622fe6845b6cb7a Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 22 Jan 2025 10:34:27 +1300 Subject: [PATCH 19/47] Update client/checks.go Co-authored-by: Ben Hoyt --- client/checks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/checks.go b/client/checks.go index e12b03c9e..60b9c4998 100644 --- a/client/checks.go +++ b/client/checks.go @@ -75,7 +75,7 @@ type CheckInfo struct { Startup CheckStartup `json:"startup"` // Status is the status of this check: "up" if healthy, "down" if the - // number of failures has reached the configured threshold, "inactive" if + // number of failures has reached the configured threshold, or "inactive" if // the check is inactive. Status CheckStatus `json:"status"` From 24c4118041fa7fcb0d4169ae851203f2d6fe8421 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 22 Jan 2025 10:34:49 +1300 Subject: [PATCH 20/47] Update client/checks.go Co-authored-by: Ben Hoyt --- client/checks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/checks.go b/client/checks.go index 60b9c4998..e8d46c104 100644 --- a/client/checks.go +++ b/client/checks.go @@ -139,7 +139,7 @@ type multiCheckActionData struct { Checks []string `json:"checks"` } -func (client *Client) doMultiCheckAction(actionName string, checks []string) (changeID string, err error) { +func (client *Client) doMultiCheckAction(actionName string, checks []string) (response string, err error) { action := multiCheckActionData{ Action: actionName, Checks: checks, From a0abe430084af924f22b7e9e07d8016d8342649b Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 22 Jan 2025 10:46:40 +1300 Subject: [PATCH 21/47] Small tweaks, per review. --- HACKING.md | 2 ++ client/checks.go | 19 +++++++++++-------- internals/cli/cmd_start-checks.go | 2 +- internals/cli/cmd_stop-checks.go | 2 +- internals/daemon/api_checks.go | 4 ++-- internals/daemon/api_services.go | 2 +- internals/overlord/checkstate/manager.go | 9 ++++----- 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/HACKING.md b/HACKING.md index a5a07fca5..b73902beb 100644 --- a/HACKING.md +++ b/HACKING.md @@ -251,6 +251,8 @@ In the `docs` directory, run `tox -e commands` to automatically update the CLI r A CI workflow will fail if the CLI reference documentation does not match the actual output from Pebble. +Note that the [OpenAPI spec](docs/specs/openapi.yaml) also needs to be manually updated. + ### Writing a great doc - Use short sentences, ideally with one or two clauses. diff --git a/client/checks.go b/client/checks.go index e8d46c104..f074ff9bc 100644 --- a/client/checks.go +++ b/client/checks.go @@ -25,13 +25,17 @@ import ( type ChecksOptions struct { // Level is the check level to query for. A check is included in the // results if this field is not set, or if it is equal to the check's - // level. This field is ignored for start and stop actions. + // level. Level CheckLevel - // Names is the list of check names on which to action. For querying, a - // check is included in the results if this field is nil or empty slice. For - // all actions, a check is included in the results if one of the values in - // the slice is equal to the check's name. + // Names is the list of check names to query for. A check is included in + // the results if this field is nil or empty slice, or if one of the + // values in the slice is equal to the check's name. + Names []string +} + +type ChecksActionOptions struct { + // Names is the list of check names on which to perform the action. Names []string } @@ -122,14 +126,14 @@ func (client *Client) Checks(opts *ChecksOptions) ([]*CheckInfo, error) { // Start starts the checks named in opts.Names. We ignore ops.Level for this // action. -func (client *Client) StartChecks(opts *ChecksOptions) (response string, err error) { +func (client *Client) StartChecks(opts *ChecksActionOptions) (response string, err error) { response, err = client.doMultiCheckAction("start", opts.Names) return response, err } // Stop stops the checks named in opts.Names. We ignore ops.Level for this // action. -func (client *Client) StopChecks(opts *ChecksOptions) (response string, err error) { +func (client *Client) StopChecks(opts *ChecksActionOptions) (response string, err error) { response, err = client.doMultiCheckAction("stop", opts.Names) return response, err } @@ -156,7 +160,6 @@ func (client *Client) doMultiCheckAction(actionName string, checks []string) (re Type: SyncRequest, Method: "POST", Path: "/v1/checks", - Query: nil, Headers: headers, Body: bytes.NewBuffer(data), }) diff --git a/internals/cli/cmd_start-checks.go b/internals/cli/cmd_start-checks.go index 58e6095f0..1de2b768c 100644 --- a/internals/cli/cmd_start-checks.go +++ b/internals/cli/cmd_start-checks.go @@ -53,7 +53,7 @@ func (cmd cmdStartChecks) Execute(args []string) error { return ErrExtraArgs } - checkopts := client.ChecksOptions{ + checkopts := client.ChecksActionOptions{ Names: cmd.Positional.Checks, } response, err := cmd.client.StartChecks(&checkopts) diff --git a/internals/cli/cmd_stop-checks.go b/internals/cli/cmd_stop-checks.go index 2d9598222..18e0091e6 100644 --- a/internals/cli/cmd_stop-checks.go +++ b/internals/cli/cmd_stop-checks.go @@ -53,7 +53,7 @@ func (cmd cmdStopChecks) Execute(args []string) error { return ErrExtraArgs } - checkopts := client.ChecksOptions{ + checkopts := client.ChecksActionOptions{ Names: cmd.Positional.Checks, } response, err := cmd.client.StopChecks(&checkopts) diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 1fa5f2c31..3b4305ac4 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -86,11 +86,11 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { return BadRequest("must specify checks for %s action", payload.Action) } - var err error - var checks []string checkmgr := c.d.overlord.CheckManager() plan := c.d.overlord.PlanManager().Plan() + var err error + var checks []string switch payload.Action { case "start": checks, err = checkmgr.StartChecks(plan, payload.Checks) diff --git a/internals/daemon/api_services.go b/internals/daemon/api_services.go index 4335b0d71..9ec7f83de 100644 --- a/internals/daemon/api_services.go +++ b/internals/daemon/api_services.go @@ -111,7 +111,7 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { // "build the layers into a plan" than actually taking action on the plan. // Maybe planstate should gain a Replan method that does call the listeners // but then this code would be calling that method and it's not clear what - // it would do *other* that calling the listeners. planstate's + // it would do *other* that calling the listeners. planstate's // callChangeListeners could get a public interface, but it still seems like // it's around the wrong way for the services API to be calling it. // If you don't instinctively know the right way to do this, I'm happy to diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index e5bee09d9..16fe80c78 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -384,8 +384,8 @@ type checker interface { check(ctx context.Context) error } -// StartChecks starts the specified checks by creating a new perform-checks -// change for each check that is in the current plan and not already running. +// StartChecks starts the specified checks, if not already running, and returns +// the checks that did need to be started. func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]string, error) { m.state.Lock() defer m.state.Unlock() @@ -415,9 +415,8 @@ func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]s return started, nil } -// StopChecks stops the specified checks by aborting the perform-check or -// recover-check change associated with the check, skipping any checks that are -// already inactive. +// StopChecks stops the specified checks, if currently running, and returns +// the checks that did need to be stopped. func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]string, error) { m.state.Lock() defer m.state.Unlock() From 5264aedd8ccd2b602dcc64c7fa5508138037bc40 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 22 Jan 2025 10:57:56 +1300 Subject: [PATCH 22/47] Formatting fix. --- HACKING.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/HACKING.md b/HACKING.md index b73902beb..56eb6de44 100644 --- a/HACKING.md +++ b/HACKING.md @@ -5,6 +5,7 @@ - [Using Curl to hit the API](#using-curl-to-hit-the-api) - [Code style](#code-style) - [Running the tests](#running-the-tests) +- [Docs](#docs) - [Creating a release](#creating-a-release) Hacking on Pebble is easy. It's written in Go, so install or [download](https://golang.org/dl/) a copy of the latest version of Go. Pebble uses [Go modules](https://golang.org/ref/mod) for managing dependencies, so all of the standard Go tooling just works. @@ -226,7 +227,7 @@ To pull in the latest style and dependencies from the starter pack, clone the [C - Under the `docs/` folder, run `python3 build_requirements.py`. This generates the latest `requirements.txt` under the `.sphinx/` folder. - Under the `docs/` folder, run `tox -e docs-dep` to compile a pinned requirements file for tox environments. -## Updating the CLI reference documentation +### Updating the CLI reference documentation To add a new CLI command, ensure that it is added in the list at the top of the [doc](docs/reference/cli-commands.md) in the appropriate section, and then add a new section for the details **in alphabetical order**. @@ -239,9 +240,9 @@ The section should look like: The `{command name}` command is used to {describe the command}. -```{terminal} +``````{terminal} :input: pebble {command name} --help -``` +`````` ``` From 6dea00f6d6a66c87fc14fa114af45a55bc354e53 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 22 Jan 2025 11:01:15 +1300 Subject: [PATCH 23/47] Formatting fix. --- HACKING.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HACKING.md b/HACKING.md index 56eb6de44..59032f53f 100644 --- a/HACKING.md +++ b/HACKING.md @@ -233,18 +233,18 @@ To add a new CLI command, ensure that it is added in the list at the top of the The section should look like: -``` + (reference_pebble_{command name}_command)= ## {command name} The `{command name}` command is used to {describe the command}. -``````{terminal} +```{terminal} :input: pebble {command name} --help -`````` - ``` + + With `{command name}` replaced by the name of the command and `{describe the command}` replaced by a suitable description. From 3b755b2409b11b4898bc24c0a08a0c01393c9318 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 22 Jan 2025 11:55:11 +1300 Subject: [PATCH 24/47] Try another way to fix the triple backticks in triple backticks issue. --- HACKING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HACKING.md b/HACKING.md index 59032f53f..74812a6e1 100644 --- a/HACKING.md +++ b/HACKING.md @@ -233,7 +233,7 @@ To add a new CLI command, ensure that it is added in the list at the top of the The section should look like: - +```` (reference_pebble_{command name}_command)= ## {command name} @@ -244,7 +244,7 @@ The `{command name}` command is used to {describe the command}. :input: pebble {command name} --help ``` - +```` With `{command name}` replaced by the name of the command and `{describe the command}` replaced by a suitable description. From 523a21a0969519d62b60bcd076e2bdd7d16047cd Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 24 Jan 2025 13:23:30 +1300 Subject: [PATCH 25/47] Adjust return type. --- client/checks.go | 48 +++++++------ internals/cli/cmd_start-checks.go | 12 +++- internals/cli/cmd_stop-checks.go | 12 +++- internals/daemon/api_checks.go | 23 +++---- internals/daemon/api_services.go | 33 +++------ internals/overlord/checkstate/manager.go | 87 +++++++++++++++++++----- internals/overlord/overlord.go | 2 +- 7 files changed, 132 insertions(+), 85 deletions(-) diff --git a/client/checks.go b/client/checks.go index f074ff9bc..c42a930cd 100644 --- a/client/checks.go +++ b/client/checks.go @@ -39,6 +39,11 @@ type ChecksActionOptions struct { Names []string } +// CheckActionResults holds the results of a check action. +type CheckActionResults struct { + Changed []string `json:"changed"` +} + // CheckLevel represents the level of a health check. type CheckLevel string @@ -124,18 +129,16 @@ func (client *Client) Checks(opts *ChecksOptions) ([]*CheckInfo, error) { return checks, nil } -// Start starts the checks named in opts.Names. We ignore ops.Level for this -// action. -func (client *Client) StartChecks(opts *ChecksActionOptions) (response string, err error) { - response, err = client.doMultiCheckAction("start", opts.Names) - return response, err +// Start starts the checks named in opts.Names. +func (client *Client) StartChecks(opts *ChecksActionOptions) (results *CheckActionResults, err error) { + results, err = client.doMultiCheckAction("start", opts.Names) + return results, err } -// Stop stops the checks named in opts.Names. We ignore ops.Level for this -// action. -func (client *Client) StopChecks(opts *ChecksActionOptions) (response string, err error) { - response, err = client.doMultiCheckAction("stop", opts.Names) - return response, err +// Stop stops the checks named in opts.Names. +func (client *Client) StopChecks(opts *ChecksActionOptions) (results *CheckActionResults, err error) { + results, err = client.doMultiCheckAction("stop", opts.Names) + return results, err } type multiCheckActionData struct { @@ -143,34 +146,29 @@ type multiCheckActionData struct { Checks []string `json:"checks"` } -func (client *Client) doMultiCheckAction(actionName string, checks []string) (response string, err error) { +func (client *Client) doMultiCheckAction(actionName string, checks []string) (results *CheckActionResults, err error) { action := multiCheckActionData{ Action: actionName, Checks: checks, } data, err := json.Marshal(&action) if err != nil { - return "", fmt.Errorf("cannot marshal multi-check action: %w", err) - } - headers := map[string]string{ - "Content-Type": "application/json", + return nil, fmt.Errorf("cannot marshal multi-check action: %w", err) } resp, err := client.Requester().Do(context.Background(), &RequestOptions{ - Type: SyncRequest, - Method: "POST", - Path: "/v1/checks", - Headers: headers, - Body: bytes.NewBuffer(data), + Type: SyncRequest, + Method: "POST", + Path: "/v1/checks", + Body: bytes.NewBuffer(data), }) if err != nil { - return "", err + return nil, err } - var response string - err = resp.DecodeResult(&response) + err = resp.DecodeResult(&results) if err != nil { - return "", err + return nil, err } - return response, nil + return results, nil } diff --git a/internals/cli/cmd_start-checks.go b/internals/cli/cmd_start-checks.go index 1de2b768c..8672a26c0 100644 --- a/internals/cli/cmd_start-checks.go +++ b/internals/cli/cmd_start-checks.go @@ -16,6 +16,7 @@ package cli import ( "fmt" + "strings" "github.com/canonical/go-flags" @@ -56,11 +57,18 @@ func (cmd cmdStartChecks) Execute(args []string) error { checkopts := client.ChecksActionOptions{ Names: cmd.Positional.Checks, } - response, err := cmd.client.StartChecks(&checkopts) + results, err := cmd.client.StartChecks(&checkopts) if err != nil { return err } - fmt.Fprintln(Stdout, response) + var summary string + if len(results.Changed) == 0 { + summary = fmt.Sprintf("Checks already started.") + } else { + summary = fmt.Sprintf("Checks started: %s", strings.Join(results.Changed, ", ")) + } + + fmt.Fprintln(Stdout, summary) return nil } diff --git a/internals/cli/cmd_stop-checks.go b/internals/cli/cmd_stop-checks.go index 18e0091e6..6190db4b7 100644 --- a/internals/cli/cmd_stop-checks.go +++ b/internals/cli/cmd_stop-checks.go @@ -16,6 +16,7 @@ package cli import ( "fmt" + "strings" "github.com/canonical/go-flags" @@ -56,11 +57,18 @@ func (cmd cmdStopChecks) Execute(args []string) error { checkopts := client.ChecksActionOptions{ Names: cmd.Positional.Checks, } - response, err := cmd.client.StopChecks(&checkopts) + results, err := cmd.client.StopChecks(&checkopts) if err != nil { return err } - fmt.Fprintln(Stdout, response) + var summary string + if len(results.Changed) == 0 { + summary = fmt.Sprintf("Checks already stopped.") + } else { + summary = fmt.Sprintf("Checks stopped: %s", strings.Join(results.Changed, ", ")) + } + + fmt.Fprintln(Stdout, summary) return nil } diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 3b4305ac4..943966ae8 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -16,7 +16,6 @@ package daemon import ( "encoding/json" - "fmt" "net/http" "github.com/canonical/x-go/strutil" @@ -87,15 +86,14 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { } checkmgr := c.d.overlord.CheckManager() - plan := c.d.overlord.PlanManager().Plan() var err error - var checks []string + var changed []*plan.Check switch payload.Action { case "start": - checks, err = checkmgr.StartChecks(plan, payload.Checks) + changed, err = checkmgr.StartChecks(payload.Checks) case "stop": - checks, err = checkmgr.StopChecks(plan, payload.Checks) + changed, err = checkmgr.StopChecks(payload.Checks) default: return BadRequest("invalid action %q", payload.Action) } @@ -106,13 +104,12 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { st := c.d.overlord.State() st.EnsureBefore(0) // start and stop tasks right away - var result string - if len(checks) == 0 { - result = fmt.Sprintf("No checks needed to %s", payload.Action) - } else if len(checks) == 1 { - result = fmt.Sprintf("Queued %s for check %q", payload.Action, checks[0]) - } else { - result = fmt.Sprintf("Queued %s for check %q and %d more", payload.Action, checks[0], len(checks)-1) + var changedNames []string + for _, check := range changed { + changedNames = append(changedNames, check.Name) } - return SyncResponse(result) + type responsePayload struct { + Changed []string `json:"changed"` + } + return SyncResponse(responsePayload{Changed: changedNames}) } diff --git a/internals/daemon/api_services.go b/internals/daemon/api_services.go index 9ec7f83de..4e088711d 100644 --- a/internals/daemon/api_services.go +++ b/internals/daemon/api_services.go @@ -99,31 +99,6 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { } } - // TODO (ping @benhoyt) This is clearly the wrong place for this - it's out - // of the switch below (to avoid deadlocking on the state lock) but even - // more so, it doesn't seem correct for the services API to be aware of the - // check manager's need to be notified. - // It seems like the right thing to do would be to trigger a PlanChanged - // (the doc says that it might happen without the plan changing, which is - // the case here - but alternatively there could be a ReplanRequested - // listener that works similarly). However, notifying the PlanChanged - // listeners is a planstate manager thing, and planstate seems to be more - // "build the layers into a plan" than actually taking action on the plan. - // Maybe planstate should gain a Replan method that does call the listeners - // but then this code would be calling that method and it's not clear what - // it would do *other* that calling the listeners. planstate's - // callChangeListeners could get a public interface, but it still seems like - // it's around the wrong way for the services API to be calling it. - // If you don't instinctively know the right way to do this, I'm happy to - // have a call to discuss. I missed this earlier (all my replan testing also - // had changes to the plan happening so services were started up again), so - // have only had a small amount of time to think it over/try things out. - if payload.Action == "replan" { - checkmgr := c.d.overlord.CheckManager() - plan := c.d.overlord.PlanManager().Plan() - defer checkmgr.PlanChanged(plan) - } - st := c.d.overlord.State() st.Lock() defer st.Unlock() @@ -207,6 +182,14 @@ func v1PostServices(c *Command, r *http.Request, _ *UserState) Response { sort.Strings(services) payload.Services = services + // We need to ensure that checks are also updated when a replan occurs, + // so we do that directly here. If this gets more complex in the future + // - for example other managers also need to be informed, then we might + // want to introduce a listener system for replan, but for now we keep + // it simple. + checkmgr := c.d.overlord.CheckManager() + checkmgr.Replan() + default: return BadRequest("invalid action %q", payload.Action) } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 16fe80c78..1ee3fed45 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -23,6 +23,8 @@ import ( "gopkg.in/tomb.v2" + "github.com/canonical/pebble/internals/logger" + "github.com/canonical/pebble/internals/overlord/planstate" "github.com/canonical/pebble/internals/overlord/state" "github.com/canonical/pebble/internals/plan" ) @@ -44,17 +46,18 @@ type CheckManager struct { checksLock sync.Mutex checks map[string]CheckInfo - plan *plan.Plan + planMgr *planstate.PlanManager } // FailureFunc is the type of function called when a failure action is triggered. type FailureFunc func(name string) // NewManager creates a new check manager. -func NewManager(s *state.State, runner *state.TaskRunner) *CheckManager { +func NewManager(s *state.State, runner *state.TaskRunner, planMgr *planstate.PlanManager) *CheckManager { manager := &CheckManager{ - state: s, - checks: make(map[string]CheckInfo), + state: s, + checks: make(map[string]CheckInfo), + planMgr: planMgr, } // Health check changes can be long-running; ensure they don't get pruned. @@ -384,24 +387,33 @@ type checker interface { check(ctx context.Context) error } -// StartChecks starts the specified checks, if not already running, and returns -// the checks that did need to be started. -func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]string, error) { +// StartChecks starts the checks with the specified names, if not already +// running, and returns the checks that did need to be started. +func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { m.state.Lock() defer m.state.Unlock() + currentPlan := m.planMgr.Plan() + // If any check specified is not in the plan, return an error. for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { return nil, fmt.Errorf("cannot find check %q in plan", name) } } - var started []string + + var started []*plan.Check for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. + m.checksLock.Lock() info, ok := m.checks[name] + m.checksLock.Unlock() if !ok { - panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) + // This will be rare: either a PlanChanged is running and this is + // between the info being removed and then being added back, or the + // check is new to the plan and PlanChanged hasn't finished yet. + logger.Noticef("check %s is in the plan but not known to the manager", name) + continue } // If the check is already running, skip it. if info.ChangeID != "" { @@ -409,30 +421,38 @@ func (m *CheckManager) StartChecks(currentPlan *plan.Plan, checks []string) ([]s } changeID := performCheckChange(m.state, check) m.updateCheckInfo(check, changeID, 0) - started = append(started, check.Name) + started = append(started, check) } return started, nil } -// StopChecks stops the specified checks, if currently running, and returns -// the checks that did need to be stopped. -func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]string, error) { +// StopChecks stops the checks with the specified names, if currently running, +// and returns the checks that did need to be stopped. +func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { m.state.Lock() defer m.state.Unlock() + currentPlan := m.planMgr.Plan() + // If any check specified is not in the plan, return an error. for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { return nil, fmt.Errorf("cannot find check %q in plan", name) } } - var stopped []string + var stopped []*plan.Check for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. + m.checksLock.Lock() info, ok := m.checks[name] + m.checksLock.Unlock() if !ok { - panic(fmt.Sprintf("check %s is in the plan but not known to the manager", name)) + // This will be rare: either a PlanChanged is running and this is + // between the info being removed and then being added back, or the + // check is new to the plan and PlanChanged hasn't finished yet. + logger.Noticef("check %s is in the plan but not known to the manager", name) + continue } // If the check is not running, skip it. if info.ChangeID == "" { @@ -440,9 +460,42 @@ func (m *CheckManager) StopChecks(currentPlan *plan.Plan, checks []string) ([]st } change := m.state.Change(info.ChangeID) change.Abort() - m.updateCheckInfo(currentPlan.Checks[check.Name], "", 0) - stopped = append(stopped, check.Name) + // We pass in the current number of failures so that it remains the + // same, so that people can inspect what the state of the check was when + // it was stopped. The status of the check will be "inactive", but the + // failure count combined with the threshold will give the full picture. + m.updateCheckInfo(check, "", info.Failures) + stopped = append(stopped, check) } return stopped, nil } + +// Replan handles starting "startup: enabled" checks when a replan occurs. +// The state lock must be held when calling this method. +// TODO: verify that replan does not stop services with startup:disabled +func (m *CheckManager) Replan() { + currentPlan := m.planMgr.Plan() + + for _, check := range currentPlan.Checks { + m.checksLock.Lock() + info, ok := m.checks[check.Name] + m.checksLock.Unlock() + if !ok { + // This will be rare: either a PlanChanged is running and this is + // between the info being removed and then being added back, or the + // check is new to the plan and PlanChanged hasn't finished yet. + logger.Noticef("check %s is in the plan but not known to the manager", check.Name) + continue + } + if check.Startup == plan.CheckStartupDisabled { + continue + } + // If the check is already running, skip it. + if info.ChangeID != "" { + continue + } + changeID := performCheckChange(m.state, check) + m.updateCheckInfo(check, changeID, 0) + } +} diff --git a/internals/overlord/overlord.go b/internals/overlord/overlord.go index 03d39dc61..7bfa5a55c 100644 --- a/internals/overlord/overlord.go +++ b/internals/overlord/overlord.go @@ -185,7 +185,7 @@ func New(opts *Options) (*Overlord, error) { o.commandMgr = cmdstate.NewManager(o.runner) o.stateEng.AddManager(o.commandMgr) - o.checkMgr = checkstate.NewManager(s, o.runner) + o.checkMgr = checkstate.NewManager(s, o.runner, o.planMgr) o.stateEng.AddManager(o.checkMgr) // Tell check manager about plan updates. From 90d1ccabe917b17a7d659d7475db76919f96c9cf Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Fri, 24 Jan 2025 13:25:27 +1300 Subject: [PATCH 26/47] Clarify doc. --- internals/overlord/checkstate/manager.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 1ee3fed45..763d87615 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -472,8 +472,9 @@ func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { } // Replan handles starting "startup: enabled" checks when a replan occurs. +// Checks that are "startup: disabled" but are already running do not get +// stopped in a replan. // The state lock must be held when calling this method. -// TODO: verify that replan does not stop services with startup:disabled func (m *CheckManager) Replan() { currentPlan := m.planMgr.Plan() From 2ec2dc2b5e503c689369695e94d149b08e4bbf45 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 3 Feb 2025 11:36:50 +1300 Subject: [PATCH 27/47] Verify that invalid startup values are rejected, and that the startup value is loaded. --- internals/plan/plan_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/internals/plan/plan_test.go b/internals/plan/plan_test.go index 0e607ece4..c09714c35 100644 --- a/internals/plan/plan_test.go +++ b/internals/plan/plan_test.go @@ -600,12 +600,14 @@ var planTests = []planTest{{ chk-tcp: override: merge level: ready + startup: enabled tcp: port: 7777 host: somehost chk-exec: override: replace + startup: disabled exec: command: sleep 1 environment: @@ -635,6 +637,7 @@ var planTests = []planTest{{ Name: "chk-tcp", Override: plan.MergeOverride, Level: plan.ReadyLevel, + Startup: plan.CheckStartupEnabled, Period: plan.OptionalDuration{Value: defaultCheckPeriod}, Timeout: plan.OptionalDuration{Value: defaultCheckTimeout}, Threshold: defaultCheckThreshold, @@ -647,6 +650,7 @@ var planTests = []planTest{{ Name: "chk-exec", Override: plan.ReplaceOverride, Level: plan.UnsetLevel, + Startup: plan.CheckStartupDisabled, Period: plan.OptionalDuration{Value: defaultCheckPeriod}, Timeout: plan.OptionalDuration{Value: defaultCheckTimeout}, Threshold: defaultCheckThreshold, @@ -941,6 +945,17 @@ var planTests = []planTest{{ service-context: nosvc `}, }, { + summary: `Invalid check startup value`, + error: `plan check "chk1" startup must be "enabled" or "disabled"`, + input: []string{` + checks: + chk1: + override: replace + startup: true + exec: + command: foo + `}, +}, {}, { summary: "Simple layer with log targets", input: []string{` services: From 3370039fe37918523d7355e802bf416ff9c6a73c Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 3 Feb 2025 11:37:28 +1300 Subject: [PATCH 28/47] Fix arguments for creating a check manager. --- internals/overlord/checkstate/manager_test.go | 2 +- internals/overlord/servstate/manager_test.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index e12b3cdc8..95cff02f3 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -59,7 +59,7 @@ func (s *ManagerSuite) SetUpTest(c *C) { c.Assert(err, IsNil) s.overlord = overlord.Fake() - s.manager = checkstate.NewManager(s.overlord.State(), s.overlord.TaskRunner()) + s.manager = checkstate.NewManager(s.overlord.State(), s.overlord.TaskRunner(), s.overlord.PlanManager()) s.overlord.AddManager(s.manager) s.overlord.AddManager(s.overlord.TaskRunner()) err = s.overlord.StartUp() diff --git a/internals/overlord/servstate/manager_test.go b/internals/overlord/servstate/manager_test.go index 6cb1feb7d..d5f38d90a 100644 --- a/internals/overlord/servstate/manager_test.go +++ b/internals/overlord/servstate/manager_test.go @@ -908,7 +908,7 @@ func (s *S) TestOnCheckFailureRestartWhileRunning(c *C) { s.planAddLayer(c, testPlanLayer) // Create check manager and tell it about plan updates - checkMgr := checkstate.NewManager(s.st, s.runner) + checkMgr := checkstate.NewManager(s.st, s.runner, nil) defer checkMgr.PlanChanged(&plan.Plan{}) // Tell service manager about check failures @@ -1003,7 +1003,7 @@ func (s *S) TestOnCheckFailureRestartDuringBackoff(c *C) { s.planAddLayer(c, testPlanLayer) // Create check manager and tell it about plan updates - checkMgr := checkstate.NewManager(s.st, s.runner) + checkMgr := checkstate.NewManager(s.st, s.runner, nil) defer checkMgr.PlanChanged(&plan.Plan{}) // Tell service manager about check failures @@ -1095,7 +1095,7 @@ func (s *S) TestOnCheckFailureIgnore(c *C) { s.planAddLayer(c, testPlanLayer) // Create check manager and tell it about plan updates - checkMgr := checkstate.NewManager(s.st, s.runner) + checkMgr := checkstate.NewManager(s.st, s.runner, nil) defer checkMgr.PlanChanged(&plan.Plan{}) // Tell service manager about check failures @@ -1180,7 +1180,7 @@ func (s *S) testOnCheckFailureShutdown(c *C, action string, restartType restart. s.planAddLayer(c, testPlanLayer) // Create check manager and tell it about plan updates - checkMgr := checkstate.NewManager(s.st, s.runner) + checkMgr := checkstate.NewManager(s.st, s.runner, nil) defer checkMgr.PlanChanged(&plan.Plan{}) // Tell service manager about check failures From d69db4c9256960d5161747db7ead569abefdf0bd Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 3 Feb 2025 12:45:26 +1300 Subject: [PATCH 29/47] More test adjustments. --- client/checks.go | 1 + client/checks_test.go | 50 ++++++++++++++++++++++++++ internals/cli/cmd_start-checks_test.go | 4 +-- internals/cli/cmd_stop-checks_test.go | 4 +-- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/client/checks.go b/client/checks.go index c42a930cd..0c0216d1b 100644 --- a/client/checks.go +++ b/client/checks.go @@ -165,6 +165,7 @@ func (client *Client) doMultiCheckAction(actionName string, checks []string) (re if err != nil { return nil, err } + err = resp.DecodeResult(&results) if err != nil { return nil, err diff --git a/client/checks_test.go b/client/checks_test.go index f2758b4c5..eef9c691e 100644 --- a/client/checks_test.go +++ b/client/checks_test.go @@ -15,6 +15,7 @@ package client_test import ( + "encoding/json" "net/url" "gopkg.in/check.v1" @@ -59,3 +60,52 @@ func (cs *clientSuite) TestChecksGet(c *check.C) { "names": {"chk1", "chk3", "chk5"}, }) } + +func (cs *clientSuite) TestStartChecks(c *check.C) { + cs.rsp = `{ + "result": {"changed": ["chk1", "chk2"]}, + "status": "OK", + "status-code": 200, + "type": "sync" + }` + + opts := client.ChecksActionOptions{ + Names: []string{"chk1", "chk2"}, + } + results, err := cs.cli.StartChecks(&opts) + c.Check(err, check.IsNil) + c.Check(results.Changed, check.DeepEquals, []string{"chk1", "chk2"}) + c.Assert(cs.req.Method, check.Equals, "POST") + c.Assert(cs.req.URL.Path, check.Equals, "/v1/checks") + + var body map[string]interface{} + c.Assert(json.NewDecoder(cs.req.Body).Decode(&body), check.IsNil) + c.Check(body, check.HasLen, 2) + c.Check(body["action"], check.Equals, "start") + c.Check(body["checks"], check.DeepEquals, []interface{}{"chk1", "chk2"}) + +} + +func (cs *clientSuite) TestStopChecks(c *check.C) { + cs.rsp = `{ + "result": {"changed": ["chk1"]}, + "status": "OK", + "status-code": 200, + "type": "sync" + }` + + opts := client.ChecksActionOptions{ + Names: []string{"chk1", "chk2"}, + } + results, err := cs.cli.StopChecks(&opts) + c.Check(err, check.IsNil) + c.Check(results.Changed, check.DeepEquals, []string{"chk1"}) + c.Assert(cs.req.Method, check.Equals, "POST") + c.Assert(cs.req.URL.Path, check.Equals, "/v1/checks") + + var body map[string]interface{} + c.Assert(json.NewDecoder(cs.req.Body).Decode(&body), check.IsNil) + c.Check(body, check.HasLen, 2) + c.Check(body["action"], check.Equals, "stop") + c.Check(body["checks"], check.DeepEquals, []interface{}{"chk1", "chk2"}) +} diff --git a/internals/cli/cmd_start-checks_test.go b/internals/cli/cmd_start-checks_test.go index 63502ca8e..802b85b74 100644 --- a/internals/cli/cmd_start-checks_test.go +++ b/internals/cli/cmd_start-checks_test.go @@ -37,14 +37,14 @@ func (s *PebbleSuite) TestStartChecks(c *check.C) { fmt.Fprintf(w, `{ "type": "sync", "status-code": 200, - "result": "Queued \"start\" for check chk1 and 1 more" + "result": {"changed": ["chk1", "chk2"]} }`) }) rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2"}) c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) - c.Check(s.Stdout(), check.Equals, "Queued \"start\" for check chk1 and 1 more\n") + c.Check(s.Stdout(), check.Equals, "Checks started: chk1, chk2\n") c.Check(s.Stderr(), check.Equals, "") } diff --git a/internals/cli/cmd_stop-checks_test.go b/internals/cli/cmd_stop-checks_test.go index 1d3e360ec..3dcb0c326 100644 --- a/internals/cli/cmd_stop-checks_test.go +++ b/internals/cli/cmd_stop-checks_test.go @@ -37,14 +37,14 @@ func (s *PebbleSuite) TestStopChecks(c *check.C) { fmt.Fprintf(w, `{ "type": "sync", "status-code": 200, - "result": "Queued \"stop\" for check chk1 and 1 more" + "result": {"changed": ["chk1", "chk2"]} }`) }) rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2"}) c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) - c.Check(s.Stdout(), check.Equals, "Queued \"stop\" for check chk1 and 1 more\n") + c.Check(s.Stdout(), check.Equals, "Checks stopped: chk1, chk2\n") c.Check(s.Stderr(), check.Equals, "") } From 1e3feacaf5589f4039e5698733b33abd84c77f62 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 3 Feb 2025 12:46:38 +1300 Subject: [PATCH 30/47] Fix the response type. --- docs/specs/openapi.yaml | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/specs/openapi.yaml b/docs/specs/openapi.yaml index 66c08bb5f..23690b150 100644 --- a/docs/specs/openapi.yaml +++ b/docs/specs/openapi.yaml @@ -336,19 +336,17 @@ paths: example: {"action": "start", "checks": ["svc1"]} responses: - "202": - description: Accepted - asynchronous operation started. + "200": + description: Check operations completed. content: application/json: schema: - $ref: "#/components/schemas/PostServicesResponse" + $ref: "#/components/schemas/CheckActionResponse" example: { - "type": "async", - "status-code": 202, - "status": "Accepted", - "change": "25", - "result": null + "type": "sync", + "status-code": 200, + "result": {"changed": ["chk1", "chk2"]} } /v1/exec: post: @@ -1175,6 +1173,19 @@ components: change: type: string description: The Change ID of the asynchronous change. + CheckActionResponse: + allOf: + - $ref: "#/components/schemas/BaseResponse" + - type: object + properties: + result: + type: object + properties: + changed: + type: array + items: + type: string + description: List of checks that were changed. GetChangesResponse: allOf: - $ref: "#/components/schemas/BaseResponse" From 0a0bd058e54f1ddb9f937d6e30a6df7a9e5db174 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 09:49:13 +1300 Subject: [PATCH 31/47] Minor tweaks from review. --- client/checks.go | 21 ++++++++++----------- client/checks_test.go | 4 ++-- docs/reference/cli-commands.md | 6 +++--- internals/cli/cmd_replan.go | 8 ++++---- internals/cli/cmd_start-checks_test.go | 4 ++-- internals/cli/cmd_stop-checks_test.go | 4 ++-- internals/daemon/api_checks.go | 2 +- internals/overlord/checkstate/manager.go | 24 +++++++++++++----------- 8 files changed, 37 insertions(+), 36 deletions(-) diff --git a/client/checks.go b/client/checks.go index 0c0216d1b..e61be5156 100644 --- a/client/checks.go +++ b/client/checks.go @@ -39,8 +39,8 @@ type ChecksActionOptions struct { Names []string } -// CheckActionResults holds the results of a check action. -type CheckActionResults struct { +// ChecksActionResult holds the results of a check action. +type ChecksActionResult struct { Changed []string `json:"changed"` } @@ -129,16 +129,14 @@ func (client *Client) Checks(opts *ChecksOptions) ([]*CheckInfo, error) { return checks, nil } -// Start starts the checks named in opts.Names. -func (client *Client) StartChecks(opts *ChecksActionOptions) (results *CheckActionResults, err error) { - results, err = client.doMultiCheckAction("start", opts.Names) - return results, err +// StartChecks starts the checks named in opts.Names. +func (client *Client) StartChecks(opts *ChecksActionOptions) (*ChecksActionResult, error) { + return client.doMultiCheckAction("start", opts.Names) } -// Stop stops the checks named in opts.Names. -func (client *Client) StopChecks(opts *ChecksActionOptions) (results *CheckActionResults, err error) { - results, err = client.doMultiCheckAction("stop", opts.Names) - return results, err +// StopChecks stops the checks named in opts.Names. +func (client *Client) StopChecks(opts *ChecksActionOptions) (*ChecksActionResult, error) { + return client.doMultiCheckAction("stop", opts.Names) } type multiCheckActionData struct { @@ -146,7 +144,7 @@ type multiCheckActionData struct { Checks []string `json:"checks"` } -func (client *Client) doMultiCheckAction(actionName string, checks []string) (results *CheckActionResults, err error) { +func (client *Client) doMultiCheckAction(actionName string, checks []string) (*ChecksActionResult, error) { action := multiCheckActionData{ Action: actionName, Checks: checks, @@ -166,6 +164,7 @@ func (client *Client) doMultiCheckAction(actionName string, checks []string) (re return nil, err } + var results *ChecksActionResult err = resp.DecodeResult(&results) if err != nil { return nil, err diff --git a/client/checks_test.go b/client/checks_test.go index eef9c691e..b3a67621f 100644 --- a/client/checks_test.go +++ b/client/checks_test.go @@ -63,7 +63,7 @@ func (cs *clientSuite) TestChecksGet(c *check.C) { func (cs *clientSuite) TestStartChecks(c *check.C) { cs.rsp = `{ - "result": {"changed": ["chk1", "chk2"]}, + "result": {"changed": ["chk1", "chk2"]}, "status": "OK", "status-code": 200, "type": "sync" @@ -88,7 +88,7 @@ func (cs *clientSuite) TestStartChecks(c *check.C) { func (cs *clientSuite) TestStopChecks(c *check.C) { cs.rsp = `{ - "result": {"changed": ["chk1"]}, + "result": {"changed": ["chk1"]}, "status": "OK", "status-code": 200, "type": "sync" diff --git a/docs/reference/cli-commands.md b/docs/reference/cli-commands.md index 7d68f06b0..b00dd2151 100644 --- a/docs/reference/cli-commands.md +++ b/docs/reference/cli-commands.md @@ -706,9 +706,9 @@ The `replan` command starts, stops, or restarts services that have changed, so t Usage: pebble replan [replan-OPTIONS] -The replan command starts, stops, or restarts services that have changed, -so that running services exactly match the desired configuration in the -current plan. +The replan command starts, stops, or restarts services and checks that have +changed, so that running services and checks exactly match the desired +configuration in the current plan. [replan command options] --no-wait Do not wait for the operation to finish but just print the diff --git a/internals/cli/cmd_replan.go b/internals/cli/cmd_replan.go index cd24b6153..52169295e 100644 --- a/internals/cli/cmd_replan.go +++ b/internals/cli/cmd_replan.go @@ -20,11 +20,11 @@ import ( "github.com/canonical/pebble/client" ) -const cmdReplanSummary = "Ensure running services match the current plan" +const cmdReplanSummary = "Ensure running services and checks match the current plan" const cmdReplanDescription = ` -The replan command starts, stops, or restarts services that have changed, -so that running services exactly match the desired configuration in the -current plan. +The replan command starts, stops, or restarts services and checks that have +changed, so that running services and checks exactly match the desired +configuration in the current plan. ` type cmdReplan struct { diff --git a/internals/cli/cmd_start-checks_test.go b/internals/cli/cmd_start-checks_test.go index 802b85b74..dce7915e7 100644 --- a/internals/cli/cmd_start-checks_test.go +++ b/internals/cli/cmd_start-checks_test.go @@ -31,7 +31,7 @@ func (s *PebbleSuite) TestStartChecks(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ "action": "start", - "checks": []interface{}{"chk1", "chk2"}, + "checks": []interface{}{"chk1", "chk2", "chk3"}, }) fmt.Fprintf(w, `{ @@ -41,7 +41,7 @@ func (s *PebbleSuite) TestStartChecks(c *check.C) { }`) }) - rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2"}) + rest, err := cli.ParserForTest().ParseArgs([]string{"start-checks", "chk1", "chk2", "chk3"}) c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) c.Check(s.Stdout(), check.Equals, "Checks started: chk1, chk2\n") diff --git a/internals/cli/cmd_stop-checks_test.go b/internals/cli/cmd_stop-checks_test.go index 3dcb0c326..76566a08e 100644 --- a/internals/cli/cmd_stop-checks_test.go +++ b/internals/cli/cmd_stop-checks_test.go @@ -31,7 +31,7 @@ func (s *PebbleSuite) TestStopChecks(c *check.C) { body := DecodedRequestBody(c, r) c.Check(body, check.DeepEquals, map[string]interface{}{ "action": "stop", - "checks": []interface{}{"chk1", "chk2"}, + "checks": []interface{}{"chk1", "chk2", "chk3"}, }) fmt.Fprintf(w, `{ @@ -41,7 +41,7 @@ func (s *PebbleSuite) TestStopChecks(c *check.C) { }`) }) - rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2"}) + rest, err := cli.ParserForTest().ParseArgs([]string{"stop-checks", "chk1", "chk2", "chk3"}) c.Assert(err, check.IsNil) c.Assert(rest, check.HasLen, 0) c.Check(s.Stdout(), check.Equals, "Checks stopped: chk1, chk2\n") diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 943966ae8..c3ed402d1 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -26,7 +26,7 @@ import ( type checkInfo struct { Name string `json:"name"` Level string `json:"level,omitempty"` - Startup string `json:"startup,omitempty"` + Startup string `json:"startup"` Status string `json:"status"` Failures int `json:"failures,omitempty"` Threshold int `json:"threshold"` diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 763d87615..e17229ddd 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -39,14 +39,13 @@ const ( // CheckManager starts and manages the health checks. type CheckManager struct { - state *state.State + state *state.State + planMgr *planstate.PlanManager failureHandlers []FailureFunc checksLock sync.Mutex checks map[string]CheckInfo - - planMgr *planstate.PlanManager } // FailureFunc is the type of function called when a failure action is triggered. @@ -390,9 +389,6 @@ type checker interface { // StartChecks starts the checks with the specified names, if not already // running, and returns the checks that did need to be started. func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { - m.state.Lock() - defer m.state.Unlock() - currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. @@ -402,6 +398,9 @@ func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { } } + m.state.Lock() + defer m.state.Unlock() + var started []*plan.Check for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. @@ -430,9 +429,6 @@ func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { // StopChecks stops the checks with the specified names, if currently running, // and returns the checks that did need to be stopped. func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { - m.state.Lock() - defer m.state.Unlock() - currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. @@ -441,6 +437,10 @@ func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { return nil, fmt.Errorf("cannot find check %q in plan", name) } } + + m.state.Lock() + defer m.state.Unlock() + var stopped []*plan.Check for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. @@ -459,13 +459,15 @@ func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { continue } change := m.state.Change(info.ChangeID) - change.Abort() + if change != nil { + change.Abort() + stopped = append(stopped, check) + } // We pass in the current number of failures so that it remains the // same, so that people can inspect what the state of the check was when // it was stopped. The status of the check will be "inactive", but the // failure count combined with the threshold will give the full picture. m.updateCheckInfo(check, "", info.Failures) - stopped = append(stopped, check) } return stopped, nil From 38983fe282d19d35dbec11af0739d0d7a27b38ae Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 09:54:26 +1300 Subject: [PATCH 32/47] Adjust return type, per review. --- internals/daemon/api_checks.go | 8 ++------ internals/overlord/checkstate/manager.go | 12 ++++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index c3ed402d1..3b43b7e94 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -88,7 +88,7 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { checkmgr := c.d.overlord.CheckManager() var err error - var changed []*plan.Check + var changed []string switch payload.Action { case "start": changed, err = checkmgr.StartChecks(payload.Checks) @@ -104,12 +104,8 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { st := c.d.overlord.State() st.EnsureBefore(0) // start and stop tasks right away - var changedNames []string - for _, check := range changed { - changedNames = append(changedNames, check.Name) - } type responsePayload struct { Changed []string `json:"changed"` } - return SyncResponse(responsePayload{Changed: changedNames}) + return SyncResponse(responsePayload{Changed: changed}) } diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index e17229ddd..81209cd4a 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -388,7 +388,7 @@ type checker interface { // StartChecks starts the checks with the specified names, if not already // running, and returns the checks that did need to be started. -func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { +func (m *CheckManager) StartChecks(checks []string) ([]string, error) { currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. @@ -401,7 +401,7 @@ func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { m.state.Lock() defer m.state.Unlock() - var started []*plan.Check + var started []string for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. m.checksLock.Lock() @@ -420,7 +420,7 @@ func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { } changeID := performCheckChange(m.state, check) m.updateCheckInfo(check, changeID, 0) - started = append(started, check) + started = append(started, check.Name) } return started, nil @@ -428,7 +428,7 @@ func (m *CheckManager) StartChecks(checks []string) ([]*plan.Check, error) { // StopChecks stops the checks with the specified names, if currently running, // and returns the checks that did need to be stopped. -func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { +func (m *CheckManager) StopChecks(checks []string) ([]string, error) { currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. @@ -441,7 +441,7 @@ func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { m.state.Lock() defer m.state.Unlock() - var stopped []*plan.Check + var stopped []string for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. m.checksLock.Lock() @@ -461,7 +461,7 @@ func (m *CheckManager) StopChecks(checks []string) ([]*plan.Check, error) { change := m.state.Change(info.ChangeID) if change != nil { change.Abort() - stopped = append(stopped, check) + stopped = append(stopped, check.Name) } // We pass in the current number of failures so that it remains the // same, so that people can inspect what the state of the check was when From 11973b904d94045a86042266ee052b280962f834 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 09:55:59 +1300 Subject: [PATCH 33/47] Name the return arguments since one is a []string now. --- internals/overlord/checkstate/manager.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 81209cd4a..12e867404 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -388,7 +388,7 @@ type checker interface { // StartChecks starts the checks with the specified names, if not already // running, and returns the checks that did need to be started. -func (m *CheckManager) StartChecks(checks []string) ([]string, error) { +func (m *CheckManager) StartChecks(checks []string) (started []string, err error) { currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. @@ -401,7 +401,6 @@ func (m *CheckManager) StartChecks(checks []string) ([]string, error) { m.state.Lock() defer m.state.Unlock() - var started []string for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. m.checksLock.Lock() @@ -428,7 +427,7 @@ func (m *CheckManager) StartChecks(checks []string) ([]string, error) { // StopChecks stops the checks with the specified names, if currently running, // and returns the checks that did need to be stopped. -func (m *CheckManager) StopChecks(checks []string) ([]string, error) { +func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) { currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. @@ -441,7 +440,6 @@ func (m *CheckManager) StopChecks(checks []string) ([]string, error) { m.state.Lock() defer m.state.Unlock() - var stopped []string for _, name := range checks { check := currentPlan.Checks[name] // We know this is ok because we checked it above. m.checksLock.Lock() From 3a037d630ccd52d7a276b01cfccba6cbd41fd220 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 10:06:53 +1300 Subject: [PATCH 34/47] Future-proof the error response, per review. --- internals/daemon/api_checks.go | 7 ++++++- internals/overlord/checkstate/manager.go | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 3b43b7e94..555de195d 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -20,6 +20,7 @@ import ( "github.com/canonical/x-go/strutil" + "github.com/canonical/pebble/internals/overlord/checkstate" "github.com/canonical/pebble/internals/plan" ) @@ -98,7 +99,11 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { return BadRequest("invalid action %q", payload.Action) } if err != nil { - return BadRequest("cannot %s checks: %v", payload.Action, err) + if _, ok := err.(*checkstate.UnknownCheck); ok { + return BadRequest("cannot %s checks: %v", payload.Action, err) + } else { + return InternalError("cannot %s checks: %v", payload.Action, err) + } } st := c.d.overlord.State() diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 12e867404..ee6bc3053 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -386,6 +386,16 @@ type checker interface { check(ctx context.Context) error } +// UnknownCheck is the error returned by StartChecks or StopChecks when a check +// with the specified name is not found in the plan. +type UnknownCheck struct { + Name string +} + +func (e *UnknownCheck) Error() string { + return fmt.Sprintf("cannot find check %q in plan", e.Name) +} + // StartChecks starts the checks with the specified names, if not already // running, and returns the checks that did need to be started. func (m *CheckManager) StartChecks(checks []string) (started []string, err error) { @@ -394,7 +404,7 @@ func (m *CheckManager) StartChecks(checks []string) (started []string, err error // If any check specified is not in the plan, return an error. for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { - return nil, fmt.Errorf("cannot find check %q in plan", name) + return nil, &UnknownCheck{Name: name} } } @@ -433,7 +443,8 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) // If any check specified is not in the plan, return an error. for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { - return nil, fmt.Errorf("cannot find check %q in plan", name) + return nil, &UnknownCheck{Name: name} + return nil, &UnknownCheck{Name: name} } } From 16f16db6cd949e8bdc1b3e7ed3b31480b876ee1e Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 10:08:44 +1300 Subject: [PATCH 35/47] Rename error type. --- internals/daemon/api_checks.go | 2 +- internals/overlord/checkstate/manager.go | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index 555de195d..e3ad342d0 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -99,7 +99,7 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { return BadRequest("invalid action %q", payload.Action) } if err != nil { - if _, ok := err.(*checkstate.UnknownCheck); ok { + if _, ok := err.(*checkstate.CheckNotFound); ok { return BadRequest("cannot %s checks: %v", payload.Action, err) } else { return InternalError("cannot %s checks: %v", payload.Action, err) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index ee6bc3053..cbcfb9651 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -386,13 +386,13 @@ type checker interface { check(ctx context.Context) error } -// UnknownCheck is the error returned by StartChecks or StopChecks when a check +// CheckNotFound is the error returned by StartChecks or StopChecks when a check // with the specified name is not found in the plan. -type UnknownCheck struct { +type CheckNotFound struct { Name string } -func (e *UnknownCheck) Error() string { +func (e *CheckNotFound) Error() string { return fmt.Sprintf("cannot find check %q in plan", e.Name) } @@ -404,7 +404,7 @@ func (m *CheckManager) StartChecks(checks []string) (started []string, err error // If any check specified is not in the plan, return an error. for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { - return nil, &UnknownCheck{Name: name} + return nil, &CheckNotFound{Name: name} } } @@ -443,8 +443,8 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) // If any check specified is not in the plan, return an error. for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { - return nil, &UnknownCheck{Name: name} - return nil, &UnknownCheck{Name: name} + return nil, &CheckNotFound{Name: name} + return nil, &CheckNotFound{Name: name} } } From 0dedf3fcb1890901e208242b88146a93b166c0bd Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 15:41:51 +1300 Subject: [PATCH 36/47] Return all the missing checks, not just the first one. --- internals/daemon/api_checks.go | 2 +- internals/overlord/checkstate/manager.go | 25 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/internals/daemon/api_checks.go b/internals/daemon/api_checks.go index e3ad342d0..26f360502 100644 --- a/internals/daemon/api_checks.go +++ b/internals/daemon/api_checks.go @@ -99,7 +99,7 @@ func v1PostChecks(c *Command, r *http.Request, _ *UserState) Response { return BadRequest("invalid action %q", payload.Action) } if err != nil { - if _, ok := err.(*checkstate.CheckNotFound); ok { + if _, ok := err.(*checkstate.ChecksNotFound); ok { return BadRequest("cannot %s checks: %v", payload.Action, err) } else { return InternalError("cannot %s checks: %v", payload.Action, err) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index cbcfb9651..45b07fe5d 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -19,6 +19,7 @@ import ( "fmt" "reflect" "sort" + "strings" "sync" "gopkg.in/tomb.v2" @@ -386,14 +387,15 @@ type checker interface { check(ctx context.Context) error } -// CheckNotFound is the error returned by StartChecks or StopChecks when a check +// ChecksNotFound is the error returned by StartChecks or StopChecks when a check // with the specified name is not found in the plan. -type CheckNotFound struct { - Name string +type ChecksNotFound struct { + Names []string } -func (e *CheckNotFound) Error() string { - return fmt.Sprintf("cannot find check %q in plan", e.Name) +func (e *ChecksNotFound) Error() string { + sort.Strings(e.Names) + return fmt.Sprintf("cannot find checks in plan: %s", strings.Join(e.Names, ", ")) } // StartChecks starts the checks with the specified names, if not already @@ -402,11 +404,15 @@ func (m *CheckManager) StartChecks(checks []string) (started []string, err error currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. + var missing []string for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { - return nil, &CheckNotFound{Name: name} + missing = append(missing, name) } } + if len(missing) > 0 { + return nil, &ChecksNotFound{Names: missing} + } m.state.Lock() defer m.state.Unlock() @@ -441,12 +447,15 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) currentPlan := m.planMgr.Plan() // If any check specified is not in the plan, return an error. + var missing []string for _, name := range checks { if _, ok := currentPlan.Checks[name]; !ok { - return nil, &CheckNotFound{Name: name} - return nil, &CheckNotFound{Name: name} + missing = append(missing, name) } } + if len(missing) > 0 { + return nil, &ChecksNotFound{Names: missing} + } m.state.Lock() defer m.state.Unlock() From b3b0ffdc64bbfe8a3d447f8eb997e0e38d379e75 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 15:52:39 +1300 Subject: [PATCH 37/47] Use any instead of interface{} to align with a PR that may be before or just after this one. --- client/checks_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/checks_test.go b/client/checks_test.go index b3a67621f..394df3bb2 100644 --- a/client/checks_test.go +++ b/client/checks_test.go @@ -82,7 +82,7 @@ func (cs *clientSuite) TestStartChecks(c *check.C) { c.Assert(json.NewDecoder(cs.req.Body).Decode(&body), check.IsNil) c.Check(body, check.HasLen, 2) c.Check(body["action"], check.Equals, "start") - c.Check(body["checks"], check.DeepEquals, []interface{}{"chk1", "chk2"}) + c.Check(body["checks"], check.DeepEquals, []any{"chk1", "chk2"}) } @@ -107,5 +107,5 @@ func (cs *clientSuite) TestStopChecks(c *check.C) { c.Assert(json.NewDecoder(cs.req.Body).Decode(&body), check.IsNil) c.Check(body, check.HasLen, 2) c.Check(body["action"], check.Equals, "stop") - c.Check(body["checks"], check.DeepEquals, []interface{}{"chk1", "chk2"}) + c.Check(body["checks"], check.DeepEquals, []any{"chk1", "chk2"}) } From f782b61f1c2f7667f50fb97e893fa9474c41c342 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 17:30:24 +1300 Subject: [PATCH 38/47] WiP --- internals/overlord/checkstate/manager_test.go | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 95cff02f3..c1681427e 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -584,3 +584,172 @@ func changeData(c *C, st *state.State, changeID string) map[string]string { c.Assert(err, IsNil) return data } + +func (s *ManagerSuite) TestStartChecks(c *C) { + origLayer := &plan.Layer{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + }, + "chk2": { + Name: "chk2", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk2"}, + Startup: plan.CheckStartupDisabled, + }, + "chk3": { + Name: "chk3", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk3"}, + Startup: plan.CheckStartupEnabled, + }, + }, + } + s.manager.planMgr.AppendLayer(origLayer) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + checks, err := s.manager.Checks() + c.Assert(err, IsNil) + var originalChangeIDs []string + for _, check := range checks { + originalChangeIDs = append(originalChangeIDs, check.ChangeID) + } + + changed, err := s.manager.StartChecks([]string{"chk1", "chk2"}) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "up", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + c.Assert(err, IsNil) + c.Assert(changed, DeepEquals, []string{"chk2"}) + checks, err = s.manager.Checks() + c.Assert(err, IsNil) + // chk1 and chk3 should still have the same change ID, chk2 should have a new one. + c.Assert(checks[0].ChangeID, Equals, originalChangeIDs[0]) + c.Assert(checks[1].ChangeID, Not(Equals), originalChangeIDs[1]) + c.Assert(checks[2].ChangeID, Equals, originalChangeIDs[2]) + // chk2's new Change should be a running perform-check. + st := s.overlord.State() + st.Lock() + change := st.Change(originalChangeIDs[1]) + status := change.Status() + st.Unlock() + c.Assert(status, Equals, state.DoingStatus) + c.Assert(change.Kind(), Equals, "perform-check") +} + +func (s *ManagerSuite) TestStartChecksNotFound(c *C) { + origLayer := &plan.Layer{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + }, + }, + } + s.manager.planMgr.AppendLayer(origLayer) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + }) + + changed, err := s.manager.StartChecks([]string{"chk1", "chk2"}) + _, ok := err.(*checkstate.ChecksNotFound) + c.Assert(ok, Equals, true) + c.Assert(err.(*checkstate.ChecksNotFound).Names, DeepEquals, []string{"chk2"}) + c.Assert(changed, IsNil) +} + +func (s *ManagerSuite) TestStopChecks(c *C) { + origLayer := &plan.Layer{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + }, + "chk2": { + Name: "chk2", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk2"}, + Startup: plan.CheckStartupDisabled, + }, + "chk3": { + Name: "chk3", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk3"}, + Startup: plan.CheckStartupEnabled, + }, + }, + } + s.manager.planMgr.AppendLayer(origLayer) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + checks, err := s.manager.Checks() + c.Assert(err, IsNil) + var originalChangeIDs []string + for _, check := range checks { + originalChangeIDs = append(originalChangeIDs, check.ChangeID) + } + + changed, err := s.manager.StopChecks([]string{"chk1", "chk2"}) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "inactive", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + c.Assert(err, IsNil) + c.Assert(changed, DeepEquals, []string{"chk1"}) + checks, err = s.manager.Checks() + c.Assert(err, IsNil) + // chk1 and chk3 should still have the same change ID, chk2 should not have one. + c.Assert(checks[0].ChangeID, Equals, originalChangeIDs[0]) + c.Assert(checks[1].ChangeID, Equals, "") + c.Assert(checks[2].ChangeID, Equals, originalChangeIDs[2]) + // chk2's old Change should have aborted. + st := s.overlord.State() + st.Lock() + change := st.Change(originalChangeIDs[1]) + status := change.Status() + st.Unlock() + c.Assert(status, Equals, state.AbortStatus) +} + +func (s *ManagerSuite) TestStopChecksNotFound(c *C) { + origLayer := &plan.Layer{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + }, + }, + } + s.manager.planMgr.AppendLayer(origLayer) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + }) + + changed, err := s.manager.StopChecks([]string{"chk1", "chk2"}) + _, ok := err.(*checkstate.ChecksNotFound) + c.Assert(ok, Equals, true) + c.Assert(err.(*checkstate.ChecksNotFound).Names, DeepEquals, []string{"chk2"}) + c.Assert(changed, IsNil) +} From 435e56e79756c5e04962fd313026c178c6cf0423 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 18:20:37 +1300 Subject: [PATCH 39/47] Add (broken) replan test. --- internals/overlord/checkstate/manager_test.go | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index c1681427e..8a730e7e7 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -753,3 +753,70 @@ func (s *ManagerSuite) TestStopChecksNotFound(c *C) { c.Assert(err.(*checkstate.ChecksNotFound).Names, DeepEquals, []string{"chk2"}) c.Assert(changed, IsNil) } + +func (s *ManagerSuite) TestReplan(c *C) { + origLayer := &plan.Layer{ + Checks: map[string]*plan.Check{ + "chk1": { + Name: "chk1", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk1"}, + }, + "chk2": { + Name: "chk2", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk2"}, + Startup: plan.CheckStartupDisabled, + }, + "chk3": { + Name: "chk3", + Period: plan.OptionalDuration{Value: time.Second}, + Threshold: 3, + Exec: &plan.ExecCheck{Command: "echo chk3"}, + Startup: plan.CheckStartupEnabled, + }, + }, + } + s.manager.planMgr.AppendLayer(origLayer) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + s.manager.StopChecks([]string{"chk1"}) + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "inactive", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + checks, err := s.manager.Checks() + var originalChangeIDs []string + for _, check := range checks { + originalChangeIDs = append(originalChangeIDs, check.ChangeID) + } + + s.manager.Replan() + waitChecks(c, s.manager, []*checkstate.CheckInfo{ + {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, + {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, + {Name: "chk3", Startup: "enabled", Status: "up", Threshold: 3}, + }) + c.Assert(err, IsNil) + checks, err = s.manager.Checks() + c.Assert(err, IsNil) + // chk3 should still have the same change ID, chk1 should have a new one, + // and chk2 should not have one. + c.Assert(checks[0].ChangeID, Not(Equals), originalChangeIDs[0]) + c.Assert(checks[1].ChangeID, Equals, "") + c.Assert(checks[2].ChangeID, Equals, originalChangeIDs[2]) + // chk1's new Change should be a running perform-check. + st := s.overlord.State() + st.Lock() + change := st.Change(originalChangeIDs[0]) + status := change.Status() + st.Unlock() + c.Assert(status, Equals, state.DoingStatus) + c.Assert(change.Kind(), Equals, "perform-check") +} From 15f61f53052ec835ac1a14ec63a9f69da1f77184 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 18:32:51 +1300 Subject: [PATCH 40/47] More WiP (currently hangs). --- internals/overlord/checkstate/manager_test.go | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 8a730e7e7..0b6fea725 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -29,6 +29,7 @@ import ( "github.com/canonical/pebble/internals/logger" "github.com/canonical/pebble/internals/overlord" "github.com/canonical/pebble/internals/overlord/checkstate" + "github.com/canonical/pebble/internals/overlord/planstate" "github.com/canonical/pebble/internals/overlord/state" "github.com/canonical/pebble/internals/plan" "github.com/canonical/pebble/internals/reaper" @@ -41,6 +42,7 @@ func Test(t *testing.T) { type ManagerSuite struct { overlord *overlord.Overlord manager *checkstate.CheckManager + planMgr *planstate.PlanManager } var _ = Suite(&ManagerSuite{}) @@ -59,7 +61,13 @@ func (s *ManagerSuite) SetUpTest(c *C) { c.Assert(err, IsNil) s.overlord = overlord.Fake() - s.manager = checkstate.NewManager(s.overlord.State(), s.overlord.TaskRunner(), s.overlord.PlanManager()) + layersDir := filepath.Join(c.MkDir(), "layers") + err = os.Mkdir(layersDir, 0755) + c.Assert(err, IsNil) + s.planMgr, err = planstate.NewManager(layersDir) + s.overlord.AddManager(s.planMgr) + c.Assert(err, IsNil) + s.manager = checkstate.NewManager(s.overlord.State(), s.overlord.TaskRunner(), s.planMgr) s.overlord.AddManager(s.manager) s.overlord.AddManager(s.overlord.TaskRunner()) err = s.overlord.StartUp() @@ -610,7 +618,8 @@ func (s *ManagerSuite) TestStartChecks(c *C) { }, }, } - s.manager.planMgr.AppendLayer(origLayer) + s.planMgr.Load() + s.planMgr.AppendLayer(origLayer, false) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, @@ -658,7 +667,8 @@ func (s *ManagerSuite) TestStartChecksNotFound(c *C) { }, }, } - s.manager.planMgr.AppendLayer(origLayer) + s.planMgr.Load() + s.planMgr.AppendLayer(origLayer, false) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, }) @@ -695,7 +705,8 @@ func (s *ManagerSuite) TestStopChecks(c *C) { }, }, } - s.manager.planMgr.AppendLayer(origLayer) + s.planMgr.Load() + s.planMgr.AppendLayer(origLayer, false) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, @@ -742,7 +753,8 @@ func (s *ManagerSuite) TestStopChecksNotFound(c *C) { }, }, } - s.manager.planMgr.AppendLayer(origLayer) + s.planMgr.Load() + s.planMgr.AppendLayer(origLayer, false) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, }) @@ -779,7 +791,8 @@ func (s *ManagerSuite) TestReplan(c *C) { }, }, } - s.manager.planMgr.AppendLayer(origLayer) + s.planMgr.Load() + s.planMgr.AppendLayer(origLayer, false) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, From a890fd315d7b419b1d3b5d4af779e62083d8ede2 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Wed, 5 Feb 2025 18:44:31 +1300 Subject: [PATCH 41/47] More attempts. --- internals/overlord/checkstate/manager_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 0b6fea725..092150ac0 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -65,8 +65,9 @@ func (s *ManagerSuite) SetUpTest(c *C) { err = os.Mkdir(layersDir, 0755) c.Assert(err, IsNil) s.planMgr, err = planstate.NewManager(layersDir) - s.overlord.AddManager(s.planMgr) c.Assert(err, IsNil) + s.planMgr.AddChangeListener(s.manager.PlanChanged) + s.overlord.AddManager(s.planMgr) s.manager = checkstate.NewManager(s.overlord.State(), s.overlord.TaskRunner(), s.planMgr) s.overlord.AddManager(s.manager) s.overlord.AddManager(s.overlord.TaskRunner()) From f9250f69a92f297fb7f1bfbe16b145605ca71262 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 10 Feb 2025 12:40:52 +1300 Subject: [PATCH 42/47] Explicitly set the change status to abort (this is clearer than hold, which it otherwise is). --- internals/overlord/checkstate/manager.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internals/overlord/checkstate/manager.go b/internals/overlord/checkstate/manager.go index 45b07fe5d..0d0e067c5 100644 --- a/internals/overlord/checkstate/manager.go +++ b/internals/overlord/checkstate/manager.go @@ -479,6 +479,7 @@ func (m *CheckManager) StopChecks(checks []string) (stopped []string, err error) change := m.state.Change(info.ChangeID) if change != nil { change.Abort() + change.SetStatus(state.AbortStatus) stopped = append(stopped, check.Name) } // We pass in the current number of failures so that it remains the From 907d213e83d85655731ab3feb365763e14d00aeb Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 10 Feb 2025 12:41:08 +1300 Subject: [PATCH 43/47] Fix tests. --- internals/overlord/checkstate/manager_test.go | 72 ++++++++++++++----- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 092150ac0..0a5aab44b 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -66,9 +66,9 @@ func (s *ManagerSuite) SetUpTest(c *C) { c.Assert(err, IsNil) s.planMgr, err = planstate.NewManager(layersDir) c.Assert(err, IsNil) - s.planMgr.AddChangeListener(s.manager.PlanChanged) s.overlord.AddManager(s.planMgr) s.manager = checkstate.NewManager(s.overlord.State(), s.overlord.TaskRunner(), s.planMgr) + s.planMgr.AddChangeListener(s.manager.PlanChanged) s.overlord.AddManager(s.manager) s.overlord.AddManager(s.overlord.TaskRunner()) err = s.overlord.StartUp() @@ -88,12 +88,14 @@ func (s *ManagerSuite) TestChecks(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, "chk2": { Name: "chk2", + Override: "replace", Level: "alive", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, @@ -101,6 +103,7 @@ func (s *ManagerSuite) TestChecks(c *C) { }, "chk3": { Name: "chk3", + Override: "replace", Level: "ready", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, @@ -121,6 +124,7 @@ func (s *ManagerSuite) TestChecks(c *C) { Checks: map[string]*plan.Check{ "chk4": { Name: "chk4", + Override: "merge", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk4"}, @@ -139,6 +143,7 @@ func (s *ManagerSuite) TestTimeout(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Millisecond}, Timeout: plan.OptionalDuration{Value: 25 * time.Millisecond}, Threshold: 1, @@ -180,6 +185,7 @@ func (s *ManagerSuite) TestCheckCanceled(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Millisecond}, Timeout: plan.OptionalDuration{Value: time.Second}, Threshold: 1, @@ -228,6 +234,7 @@ func (s *ManagerSuite) TestFailures(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: 20 * time.Millisecond}, Timeout: plan.OptionalDuration{Value: 100 * time.Millisecond}, Threshold: 3, @@ -299,6 +306,7 @@ func (s *ManagerSuite) TestFailuresBelowThreshold(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: 20 * time.Millisecond}, Timeout: plan.OptionalDuration{Value: 100 * time.Millisecond}, Threshold: 3, @@ -331,18 +339,21 @@ func (s *ManagerSuite) TestPlanChangedSmarts(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, "chk2": { Name: "chk2", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk2"}, }, "chk3": { Name: "chk3", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk3"}, @@ -368,12 +379,14 @@ func (s *ManagerSuite) TestPlanChangedSmarts(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, "chk2": { Name: "chk2", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 6, Exec: &plan.ExecCheck{Command: "echo chk2 modified"}, @@ -402,11 +415,13 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { Services: map[string]*plan.Service{ "svc1": { Name: "svc1", + Override: "replace", Command: "dummy1", WorkingDir: "/tmp", }, "svc2": { Name: "svc2", + Override: "replace", Command: "dummy2", WorkingDir: "/tmp", }, @@ -414,6 +429,7 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{ @@ -423,6 +439,7 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { }, "chk2": { Name: "chk2", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{ @@ -452,6 +469,7 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { "svc1": origPlan.Services["svc1"], "svc2": { Name: "svc2", + Override: "merge", Command: "dummy2", WorkingDir: c.MkDir(), }, @@ -486,6 +504,7 @@ func (s *ManagerSuite) TestSuccessNoLog(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: 10 * time.Millisecond}, Timeout: plan.OptionalDuration{Value: time.Second}, Threshold: 3, @@ -599,12 +618,14 @@ func (s *ManagerSuite) TestStartChecks(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, "chk2": { Name: "chk2", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk2"}, @@ -612,6 +633,7 @@ func (s *ManagerSuite) TestStartChecks(c *C) { }, "chk3": { Name: "chk3", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk3"}, @@ -619,8 +641,9 @@ func (s *ManagerSuite) TestStartChecks(c *C) { }, }, } - s.planMgr.Load() - s.planMgr.AppendLayer(origLayer, false) + err := s.planMgr.AppendLayer(origLayer, false) + c.Assert(err, IsNil) + s.manager.PlanChanged(s.planMgr.Plan()) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, @@ -650,10 +673,10 @@ func (s *ManagerSuite) TestStartChecks(c *C) { // chk2's new Change should be a running perform-check. st := s.overlord.State() st.Lock() - change := st.Change(originalChangeIDs[1]) + change := st.Change(checks[1].ChangeID) status := change.Status() st.Unlock() - c.Assert(status, Equals, state.DoingStatus) + c.Assert(status, Equals, state.DoStatus) c.Assert(change.Kind(), Equals, "perform-check") } @@ -662,14 +685,15 @@ func (s *ManagerSuite) TestStartChecksNotFound(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, }, } - s.planMgr.Load() - s.planMgr.AppendLayer(origLayer, false) + err := s.planMgr.AppendLayer(origLayer, false) + c.Assert(err, IsNil) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, }) @@ -686,12 +710,14 @@ func (s *ManagerSuite) TestStopChecks(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, "chk2": { Name: "chk2", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk2"}, @@ -699,6 +725,7 @@ func (s *ManagerSuite) TestStopChecks(c *C) { }, "chk3": { Name: "chk3", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk3"}, @@ -706,8 +733,8 @@ func (s *ManagerSuite) TestStopChecks(c *C) { }, }, } - s.planMgr.Load() - s.planMgr.AppendLayer(origLayer, false) + err := s.planMgr.AppendLayer(origLayer, false) + c.Assert(err, IsNil) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, @@ -730,14 +757,14 @@ func (s *ManagerSuite) TestStopChecks(c *C) { c.Assert(changed, DeepEquals, []string{"chk1"}) checks, err = s.manager.Checks() c.Assert(err, IsNil) - // chk1 and chk3 should still have the same change ID, chk2 should not have one. - c.Assert(checks[0].ChangeID, Equals, originalChangeIDs[0]) + // chk3 should still have the same change ID, chk1 and chk2 should not have one. + c.Assert(checks[0].ChangeID, Equals, "") c.Assert(checks[1].ChangeID, Equals, "") c.Assert(checks[2].ChangeID, Equals, originalChangeIDs[2]) - // chk2's old Change should have aborted. + // chk1's old Change should have aborted. st := s.overlord.State() st.Lock() - change := st.Change(originalChangeIDs[1]) + change := st.Change(originalChangeIDs[0]) status := change.Status() st.Unlock() c.Assert(status, Equals, state.AbortStatus) @@ -748,14 +775,15 @@ func (s *ManagerSuite) TestStopChecksNotFound(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, }, } - s.planMgr.Load() - s.planMgr.AppendLayer(origLayer, false) + err := s.planMgr.AppendLayer(origLayer, false) + c.Assert(err, IsNil) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, }) @@ -772,12 +800,14 @@ func (s *ManagerSuite) TestReplan(c *C) { Checks: map[string]*plan.Check{ "chk1": { Name: "chk1", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk1"}, }, "chk2": { Name: "chk2", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk2"}, @@ -785,6 +815,7 @@ func (s *ManagerSuite) TestReplan(c *C) { }, "chk3": { Name: "chk3", + Override: "replace", Period: plan.OptionalDuration{Value: time.Second}, Threshold: 3, Exec: &plan.ExecCheck{Command: "echo chk3"}, @@ -792,8 +823,8 @@ func (s *ManagerSuite) TestReplan(c *C) { }, }, } - s.planMgr.Load() - s.planMgr.AppendLayer(origLayer, false) + err := s.planMgr.AppendLayer(origLayer, false) + c.Assert(err, IsNil) waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, @@ -811,7 +842,9 @@ func (s *ManagerSuite) TestReplan(c *C) { originalChangeIDs = append(originalChangeIDs, check.ChangeID) } + s.overlord.State().Lock() s.manager.Replan() + s.overlord.State().Unlock() waitChecks(c, s.manager, []*checkstate.CheckInfo{ {Name: "chk1", Startup: "enabled", Status: "up", Threshold: 3}, {Name: "chk2", Startup: "disabled", Status: "inactive", Threshold: 3}, @@ -828,9 +861,10 @@ func (s *ManagerSuite) TestReplan(c *C) { // chk1's new Change should be a running perform-check. st := s.overlord.State() st.Lock() - change := st.Change(originalChangeIDs[0]) + change := st.Change(checks[0].ChangeID) + c.Assert(change, NotNil) status := change.Status() st.Unlock() - c.Assert(status, Equals, state.DoingStatus) + c.Assert(status, Equals, state.DoStatus) c.Assert(change.Kind(), Equals, "perform-check") } From 605a7fe783e04dbb25e354e4bc471c31eaec2b57 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 10 Feb 2025 12:43:56 +1300 Subject: [PATCH 44/47] Linting fix. --- internals/overlord/checkstate/manager_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 0a5aab44b..82c03ab11 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -415,13 +415,13 @@ func (s *ManagerSuite) TestPlanChangedServiceContext(c *C) { Services: map[string]*plan.Service{ "svc1": { Name: "svc1", - Override: "replace", + Override: "replace", Command: "dummy1", WorkingDir: "/tmp", }, "svc2": { Name: "svc2", - Override: "replace", + Override: "replace", Command: "dummy2", WorkingDir: "/tmp", }, From 92009582d06a946e8fb786617fae591f2485718c Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 10 Feb 2025 13:12:17 +1300 Subject: [PATCH 45/47] Allow the status to be do or doing. --- internals/overlord/checkstate/manager_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 82c03ab11..49e7b6002 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -676,7 +676,7 @@ func (s *ManagerSuite) TestStartChecks(c *C) { change := st.Change(checks[1].ChangeID) status := change.Status() st.Unlock() - c.Assert(status, Equals, state.DoStatus) + c.Assert(status, Matches, "Do.*") c.Assert(change.Kind(), Equals, "perform-check") } @@ -865,6 +865,6 @@ func (s *ManagerSuite) TestReplan(c *C) { c.Assert(change, NotNil) status := change.Status() st.Unlock() - c.Assert(status, Equals, state.DoStatus) + c.Assert(status, Matches, "Do.*") c.Assert(change.Kind(), Equals, "perform-check") } From ddca37fa7c0db0d40a3fb29299b225f7057d147f Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 10 Feb 2025 15:02:25 +1300 Subject: [PATCH 46/47] Update internals/overlord/checkstate/manager_test.go Co-authored-by: Ben Hoyt --- internals/overlord/checkstate/manager_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 49e7b6002..0407674e9 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -699,9 +699,9 @@ func (s *ManagerSuite) TestStartChecksNotFound(c *C) { }) changed, err := s.manager.StartChecks([]string{"chk1", "chk2"}) - _, ok := err.(*checkstate.ChecksNotFound) + notFoundErr, ok := err.(*checkstate.ChecksNotFound) c.Assert(ok, Equals, true) - c.Assert(err.(*checkstate.ChecksNotFound).Names, DeepEquals, []string{"chk2"}) + c.Assert(notFoundErr.Names, DeepEquals, []string{"chk2"}) c.Assert(changed, IsNil) } From 713eb7e8b3fa3dd9e2ca23c9fce3e3c28da349f8 Mon Sep 17 00:00:00 2001 From: Tony Meyer Date: Mon, 10 Feb 2025 15:05:49 +1300 Subject: [PATCH 47/47] Tweak as per review. --- internals/overlord/checkstate/manager_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/internals/overlord/checkstate/manager_test.go b/internals/overlord/checkstate/manager_test.go index 0407674e9..9c50926d4 100644 --- a/internals/overlord/checkstate/manager_test.go +++ b/internals/overlord/checkstate/manager_test.go @@ -15,6 +15,7 @@ package checkstate_test import ( + "errors" "fmt" "os" "path/filepath" @@ -699,8 +700,8 @@ func (s *ManagerSuite) TestStartChecksNotFound(c *C) { }) changed, err := s.manager.StartChecks([]string{"chk1", "chk2"}) - notFoundErr, ok := err.(*checkstate.ChecksNotFound) - c.Assert(ok, Equals, true) + var notFoundErr *checkstate.ChecksNotFound + c.Assert(errors.As(err, ¬FoundErr), Equals, true) c.Assert(notFoundErr.Names, DeepEquals, []string{"chk2"}) c.Assert(changed, IsNil) } @@ -789,9 +790,9 @@ func (s *ManagerSuite) TestStopChecksNotFound(c *C) { }) changed, err := s.manager.StopChecks([]string{"chk1", "chk2"}) - _, ok := err.(*checkstate.ChecksNotFound) - c.Assert(ok, Equals, true) - c.Assert(err.(*checkstate.ChecksNotFound).Names, DeepEquals, []string{"chk2"}) + var notFoundErr *checkstate.ChecksNotFound + c.Assert(errors.As(err, ¬FoundErr), Equals, true) + c.Assert(notFoundErr.Names, DeepEquals, []string{"chk2"}) c.Assert(changed, IsNil) }