Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce APIError for Sync Calls from CLI to Orchestrator Node #4366

Merged
merged 18 commits into from
Sep 11, 2024
Merged
4 changes: 3 additions & 1 deletion cmd/cli/job/describe.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package job

import (
"cmp"
"errors"
"fmt"
"slices"
"time"
Expand Down Expand Up @@ -82,13 +83,14 @@ func NewDescribeCmd() *cobra.Command {
func (o *DescribeOptions) run(cmd *cobra.Command, args []string, api client.API) error {
ctx := cmd.Context()
jobID := args[0]

response, err := api.Jobs().Get(ctx, &apimodels.GetJobRequest{
JobID: jobID,
Include: "executions,history",
})

if err != nil {
return fmt.Errorf("could not get job %s: %w", jobID, err)
return errors.New(err.Error())
wdbaruni marked this conversation as resolved.
Show resolved Hide resolved
}

if o.OutputOpts.Format != "" {
Expand Down
3 changes: 2 additions & 1 deletion cmd/cli/job/executions.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package job

import (
"errors"
"fmt"
"strconv"
"time"
Expand Down Expand Up @@ -147,7 +148,7 @@ func (o *ExecutionOptions) run(cmd *cobra.Command, args []string, api client.API
},
})
if err != nil {
return err
return errors.New(err.Error())
}

if err = output.Output(cmd, executionColumns, o.OutputOptions, response.Items); err != nil {
Expand Down
3 changes: 2 additions & 1 deletion cmd/cli/job/get.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package job

import (
"errors"
"fmt"
"strings"

Expand Down Expand Up @@ -108,7 +109,7 @@ func get(cmd *cobra.Command, cmdArgs []string, api client.API, cfg types.Bacalha
jobID,
OG.DownloadSettings,
); err != nil {
return fmt.Errorf("downloading job: %w", err)
return errors.New(err.Error())
}

return nil
Expand Down
3 changes: 2 additions & 1 deletion cmd/cli/job/history.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package job

import (
"errors"
"fmt"
"time"

Expand Down Expand Up @@ -160,7 +161,7 @@ func (o *HistoryOptions) run(cmd *cobra.Command, args []string, api client.API)
},
})
if err != nil {
return err
return errors.New(err.Error())
}

if err = output.Output(cmd, historyColumns, o.OutputOptions, response.Items); err != nil {
Expand Down
3 changes: 2 additions & 1 deletion cmd/util/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package util
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -33,7 +34,7 @@ func DownloadResultsHandler(
JobID: jobID,
})
if err != nil {
Fatal(cmd, fmt.Errorf("could not get results for job %s: %w", jobID, err), 1)
return errors.New(err.Error())
}

if len(response.Items) == 0 {
Expand Down
7 changes: 4 additions & 3 deletions pkg/jobstore/boltdb/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/bacalhau-project/bacalhau/pkg/lib/marshaller"
"github.com/bacalhau-project/bacalhau/pkg/lib/math"
"github.com/bacalhau-project/bacalhau/pkg/models"
"github.com/bacalhau-project/bacalhau/pkg/publicapi/apimodels"
"github.com/bacalhau-project/bacalhau/pkg/util"
"github.com/bacalhau-project/bacalhau/pkg/util/idgen"
)
Expand Down Expand Up @@ -176,7 +177,7 @@ func (b *BoltJobStore) getJob(tx *bolt.Tx, jobID string) (models.Job, error) {

data := GetBucketData(tx, NewBucketPath(BucketJobs, jobID), SpecKey)
if data == nil {
return job, bacerrors.NewJobNotFound(jobID)
return job, apimodels.NewJobNotFound(jobID)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't sound right that internal and backend components depend on apimodels which are meant for the http API between frontend (api server) and clients.

Some suggestions:

  1. We should be able to define internal errors separate from the APIs, and then implement interceptors that can translate internal errors to http errors
  2. At the same time we should avoid having to translate each individual error to http format. So maybe having an interface that each of these errors implement, or a common underlying error type can simplify the translation
  3. We should avoid having a central pkg that defines all internal errors in the system like what we have today with bacerror

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, please let me know your thoughts about it.

}

err = b.marshaller.Unmarshal(data, &job)
Expand All @@ -202,7 +203,7 @@ func (b *BoltJobStore) reifyJobID(tx *bolt.Tx, jobID string) (string, error) {

switch len(found) {
case 0:
return "", bacerrors.NewJobNotFound(jobID)
return "", apimodels.NewJobNotFound(jobID)
case 1:
return found[0], nil
default:
Expand Down Expand Up @@ -909,7 +910,7 @@ func (b *BoltJobStore) deleteJob(tx *bolt.Tx, jobID string) error {

job, err := b.getJob(tx, jobID)
if err != nil {
return bacerrors.NewJobNotFound(jobID)
apimodels.NewJobNotFound(jobID)
}

tx.OnCommit(func() {
Expand Down
131 changes: 131 additions & 0 deletions pkg/publicapi/apimodels/error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package apimodels

import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
)

// apierror represents a standardized error response for the api.
//
// it encapsulates:
// - an http status code
// - a human-readable error message
//
// purpose:
// - primarily used to send synchronous errors from the orchestrator endpoint
// - provides a structured json error response to http clients
//
// usage:
// - when the cli interacts with an orchestrator node via an http client:
// 1. the http client receives this structured json error
// 2. the human-readable message is output to stdout
// 3. the http status code allows for programmatic handling of different error types
//
// benefits:
// - ensures consistent error formatting across the api
// - facilitates both user-friendly error messages and machine-readable error codes
type APIError struct {
// httpstatuscode is the http status code associated with this error.
// it should correspond to standard http status codes (e.g., 400, 404, 500).
HTTPStatusCode int `json:"code"`

// message is a short, human-readable description of the error.
// it should be concise and provide a clear indication of what went wrong.
Message string `json:"message"`
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also return:

  • RequestID to help debug and grep server logs for the failed request
  • ErrorCode that uniquely identify the error error without having to parse the message, similar to AWS

Keep in mind that some of this information are already in the http headers, such as status code and request id. It might be redundant to serialize them in the APIError. Maybe we should omit serializing them, and just have the client set those values from the headers.

Copy link
Contributor Author

@udsamani udsamani Sep 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the RequestID. However, adding a bacalhau specific error code is tricky at this point. Mainly because we are need to make sure that we properly propagate error codes correctly every where. In addition, we need to think about how to formulate error codes for errors that occur before even we reach to bacalhau specific components, for example echo server middlewares.

I think we should pick that up in follow ups. Let me know what you think

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah please feel free to do this in a follow up PR. What I had in mind is something similar to the code below. The idea is to have standard error codes that can be used by our core components and even the modules if the errors apply, while still enabling the modules to define their own specific error code, such as BucketNotFound and ContainerStartFailed

The HTTPCode can be explicitly provided, inferred from the error code if it is a standard one, or fallback to 500.

What I really liked in your PR is the introduction of Component field which helps identify the source of the error and also removes the need for these error codes to be globally unique. I would just make the components more readable and descriptive, such as S3Publisher, Docker instead of abbreviations. To simplify things, I am proposing the default component to be Bacalhau, which doesn't mean much, but will allow us to incrementally improve the coverage of better errors.

Also as we progress, we should allow components to override the Component piece of the errors. Meaning a job not found error with JobStore component doesn't help much compared to knowing the component that actually called the job store. We don't need to do these changes today, but just thinking ahead.

What do you think?

const (
    BadRequest ErrorCode = "BadRequest"
    Unauthorized ErrorCode = "Unauthorized"
    ResourceNotFound ErrorCode = "ResourceNotFound"
    ResourceInUse ErrorCode = "ResourceInUse"
    ValidationFailed ErrorCode = "ValidationFailed"
    VersionMismatch ErrorCode = "VersionMismatch"
    TooManyRequests ErrorCode = "TooManyRequests"
    
    InternalError ErrorCode = "InternalError"
    NotImplemented ErrorCode = "NotImplemented"
    ServiceUnavailable ErrorCode = "ServiceUnavailable"
    DatastoreFailure ErrorCode = "DatastoreFailure"
    NetworkFailure ErrorCode = "NetworkFailure"
    ConfigurationError ErrorCode = "ConfigurationError"
    ResourceExhausted ErrorCode = "ResourceExhausted"
)

// Note: Component-specific error codes should be defined in their respective packages.
// Examples might include:
// - BucketNotFound
// - ObjectNotFound
// - InvalidCredentials
// - ImageNotFound
// - ContainerStartFailed
// - PeerConnectionFailed

type BaseError struct {
    Code           ErrorCode         `json:"code"`
    Message        string            `json:"message"`
    Component      string            `json:"component"`
    HTTPCode       int               `json:"http_code"`
    Hint           string            `json:"hint,omitempty"`
    Retryable      bool              `json:"retryable"`
    FailsExecution bool              `json:"fails_execution"`
    Details        map[string]string `json:"details,omitempty"`
}

func NewBaseError(format string, a ...any) *BaseError {
    return &BaseError{
        Code:      InternalError,
        Message:   fmt.Sprintf(format, a...),
        Component: "Bacalhau",
        HTTPCode:  0, // Will be inferred later if not set explicitly
    }
}

func (e *BaseError) Error() string {
    return fmt.Sprintf("[%s] %s: %s", e.Code, e.Component, e.Message)
}

func (e *BaseError) WithErrorCode(code ErrorCode) *BaseError {
    e.Code = code
    return e
}

func (e *BaseError) WithHTTPCode(httpCode int) *BaseError {
    e.HTTPCode = httpCode
    return e
}

func (e *BaseError) HTTPStatusCode() int {
    if e.HTTPCode != 0 {
        return e.HTTPCode
    }
    return inferHTTPStatusCode(e.Code)
}

func inferHTTPStatusCode(code ErrorCode) int {
    switch code {
    case BadRequest, ValidationFailed:
        return http.StatusBadRequest
    case Unauthorized:
        return http.StatusUnauthorized
    case ResourceNotFound:
        return http.StatusNotFound
    case ResourceInUse, VersionMismatch:
        return http.StatusConflict
    case TooManyRequests:
        return http.StatusTooManyRequests
    case NotImplemented:
        return http.StatusNotImplemented
    case ServiceUnavailable:
        return http.StatusServiceUnavailable
    default:
        return http.StatusInternalServerError
    }
}

// rest of the methods


// NewAPIError creates a new APIError with the given HTTP status code and message.
func NewAPIError(statusCode int, message string) *APIError {
return &APIError{
HTTPStatusCode: statusCode,
Message: message,
}
}

// NewBadRequestError creates an APIError for Bad Request (400) errors.
func NewBadRequestError(message string) *APIError {
return NewAPIError(http.StatusBadRequest, message)
}

// NewUnauthorizedError creates an APIError for Unauthorized (401) errors.
func NewUnauthorizedError(message string) *APIError {
return NewAPIError(http.StatusUnauthorized, message)
}

// NewForbiddenError creates an APIError for Forbidden (403) errors.
func NewForbiddenError(message string) *APIError {
return NewAPIError(http.StatusForbidden, message)
}

// NewNotFoundError creates an APIError for Not Found (404) errors.
func NewNotFoundError(message string) *APIError {
return NewAPIError(http.StatusNotFound, message)
}

// NewConflictError creates an APIError for Conflict (409) errors.
func NewConflictError(message string) *APIError {
return NewAPIError(http.StatusConflict, message)
}

// NewInternalServerError creates an APIError for Internal Server Error (500) errors.
func NewInternalServerError(message string) *APIError {
return NewAPIError(http.StatusInternalServerError, message)
}

func NewJobNotFound(jobID string) *APIError {
return NewAPIError(http.StatusNotFound, fmt.Sprintf("job id %s not found", jobID))
}

// IsNotFound checks if the error is an APIError with a Not Found status.
func IsNotFound(err error) bool {
apiErr, ok := err.(*APIError)
return ok && apiErr.HTTPStatusCode == http.StatusNotFound
}

// IsBadRequest checks if the error is an APIError with a Bad Request status.
func IsBadRequest(err error) bool {
apiErr, ok := err.(*APIError)
return ok && apiErr.HTTPStatusCode == http.StatusBadRequest
}

// IsInternalServerError checks if the error is an APIError with an Internal Server Error status.
func IsInternalServerError(err error) bool {
apiErr, ok := err.(*APIError)
return ok && apiErr.HTTPStatusCode == http.StatusInternalServerError
}

// Error implements the error interface, allowing APIError to be used as a standard Go error.
func (e *APIError) Error() string {
return e.Message
}

// Parse HTTP Resposne to APIError
func FromHttpResponse(resp *http.Response) (*APIError, error) {

if resp == nil {
return nil, errors.New("response is nil, cannot be unmarsheld to APIError")
}

defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading response body: %w")
}

var apiErr APIError
err = json.Unmarshal(body, &apiErr)
if err != nil {
return nil, fmt.Errorf("error parsing response body: %w", err)
}

// If the JSON didn't include a status code, use the HTTP Status
if apiErr.HTTPStatusCode == 0 {
apiErr.HTTPStatusCode = resp.StatusCode
}

return &apiErr, nil
}
23 changes: 18 additions & 5 deletions pkg/publicapi/client/v2/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,25 @@ type httpClient struct {
// and deserialize the response into a response object
func (c *httpClient) Get(ctx context.Context, endpoint string, in apimodels.GetRequest, out apimodels.GetResponse) error {
r := in.ToHTTPRequest()
_, resp, err := requireOK(c.doRequest(ctx, http.MethodGet, endpoint, r)) //nolint:bodyclose // this is being closed
if err != nil && resp != nil && resp.StatusCode == http.StatusUnauthorized {
return apimodels.ErrInvalidToken
} else if err != nil {
return err

_, resp, err := c.doRequest(ctx, http.MethodGet, endpoint, r) //nolint:bodyclose // this is being closed

if resp.StatusCode == http.StatusUnauthorized {
return apimodels.NewUnauthorizedError("invalid token")
}

var apiError *apimodels.APIError
if resp.StatusCode != http.StatusOK {
apiError, err = apimodels.FromHttpResponse(resp)
if err != nil {
return err
}
}

if apiError != nil {
return apiError
}

defer resp.Body.Close()

if out != nil {
Expand Down
7 changes: 0 additions & 7 deletions pkg/publicapi/client/v2/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,6 @@ func requireOK(d time.Duration, resp *http.Response, e error) (time.Duration, *h
// response codes and validates that the received response code is among them
func requireStatusIn(statuses ...int) doRequestWrapper {
return func(d time.Duration, resp *http.Response, e error) (time.Duration, *http.Response, error) {
if e != nil {
if resp != nil {
_ = resp.Body.Close()
}
return d, nil, e
}

for _, status := range statuses {
if resp.StatusCode == status {
return d, resp, nil
Expand Down
2 changes: 2 additions & 0 deletions pkg/publicapi/endpoint/orchestrator/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ func (e *Endpoint) getJob(c echo.Context) error { //nolint: gocyclo
if err := c.Bind(&args); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
log.Info().Msg("UDIT SUCKS")
udsamani marked this conversation as resolved.
Show resolved Hide resolved
job, err := e.store.GetJob(ctx, jobID)
if err != nil {
log.Error().Err(err)
udsamani marked this conversation as resolved.
Show resolved Hide resolved
return err
}
response := apimodels.GetJobResponse{
Expand Down
47 changes: 47 additions & 0 deletions pkg/publicapi/middleware/error_handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package middleware

import (
"net/http"

"github.com/bacalhau-project/bacalhau/pkg/publicapi/apimodels"
"github.com/labstack/echo/v4"
"github.com/rs/zerolog/log"
)

func CustomHTTPErrorHandler(err error, c echo.Context) {

var code int
var message string

switch e := err.(type) {

case *apimodels.APIError:
// If it is already our custom APIError, use its code and message
code = e.HTTPStatusCode
message = e.Message

default:
// In an ideal world this should never happen. We should always have are errors
// from server as APIError. If output is this generic string, one should evaluate
// and map it to APIError and send in appropriate message.= http.StatusInternalServerError
message = "internal server error"
}
wdbaruni marked this conversation as resolved.
Show resolved Hide resolved

// Don't override the status code if it is already been set.
// This is something that is advised by ECHO framework.
if !c.Response().Committed {

if c.Request().Method == http.MethodHead {
err = c.NoContent(code)
} else {
err = c.JSON(code, apimodels.APIError{
HTTPStatusCode: code,
Message: message,
})
}
if err != nil {
log.Info().Msg("unable to send json response while handling error.")
}
}

}
4 changes: 4 additions & 0 deletions pkg/publicapi/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ func NewAPIServer(params ServerParams) (*Server, error) {
middleware.VersionNotifyLogger(middlewareLogger, *serverVersion),
)

// Add custom http error handler. This is a centralized error handler for
// the server
server.Router.HTTPErrorHandler = middleware.CustomHTTPErrorHandler

var tlsConfig *tls.Config
if params.AutoCertDomain != "" {
log.Ctx(context.TODO()).Debug().Msgf("Setting up auto-cert for %s", params.AutoCertDomain)
Expand Down
Loading
Loading