From 764831dcbfe2f8d811b8dcc67363394960359427 Mon Sep 17 00:00:00 2001 From: Jamil Shamy <4977827+jamlo@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:51:16 -0400 Subject: [PATCH 1/2] Improve API error handling (#4607) This PR tries to fix the "FromHttpResponse" function. Returning 2 errors from a function can be confusing and uncommon in Golang, especially if the function only returns these 2 errors. The new code will return one error, which is always an APIError, and it also improves the error messaging wording, adding more details in the output. Several more tweaks can be done to the current error message handling if needed. Note: this is identical to [PR 4565](https://github.com/bacalhau-project/bacalhau/pull/4565), though created from a branch, and not a fork. --- pkg/publicapi/apimodels/error.go | 19 ++++++++++++------- pkg/publicapi/client/v2/client.go | 27 ++++++++------------------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/pkg/publicapi/apimodels/error.go b/pkg/publicapi/apimodels/error.go index 6d2bc98bfd..842fe5734e 100644 --- a/pkg/publicapi/apimodels/error.go +++ b/pkg/publicapi/apimodels/error.go @@ -2,7 +2,6 @@ package apimodels import ( "encoding/json" - "errors" "fmt" "io" "net/http" @@ -67,23 +66,29 @@ func (e *APIError) Error() string { } // Parse HTTP Resposne to APIError -func FromHttpResponse(resp *http.Response) (*APIError, error) { - +func GenerateAPIErrorFromHTTPResponse(resp *http.Response) *APIError { if resp == nil { - return nil, errors.New("response is nil, cannot be unmarsheld to APIError") + return NewAPIError(0, "API call error, invalid response") } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - return nil, fmt.Errorf("error reading response body: %w", err) + return NewAPIError( + resp.StatusCode, + fmt.Sprintf("Unable to read API call response body. Error: %q", err.Error())) } var apiErr APIError err = json.Unmarshal(body, &apiErr) if err != nil { - return nil, fmt.Errorf("error parsing response body: %w", err) + return NewAPIError( + resp.StatusCode, + fmt.Sprintf("Unable to parse API call response body. Error: %q. Body received: %q", + err.Error(), + string(body), + )) } // If the JSON didn't include a status code, use the HTTP Status @@ -91,7 +96,7 @@ func FromHttpResponse(resp *http.Response) (*APIError, error) { apiErr.HTTPStatusCode = resp.StatusCode } - return &apiErr, nil + return &apiErr } // FromBacError converts a bacerror.Error to an APIError diff --git a/pkg/publicapi/client/v2/client.go b/pkg/publicapi/client/v2/client.go index 18f687ed34..95979028be 100644 --- a/pkg/publicapi/client/v2/client.go +++ b/pkg/publicapi/client/v2/client.go @@ -74,18 +74,12 @@ func (c *httpClient) Get(ctx context.Context, endpoint string, in apimodels.GetR return apimodels.NewUnauthorizedError("invalid token") } - var apiError *apimodels.APIError if resp.StatusCode != http.StatusOK { - apiError, err = apimodels.FromHttpResponse(resp) - if err != nil { - return err + if apiError := apimodels.GenerateAPIErrorFromHTTPResponse(resp); apiError != nil { + return apiError } } - if apiError != nil { - return apiError - } - defer resp.Body.Close() if out != nil { @@ -116,18 +110,12 @@ func (c *httpClient) write(ctx context.Context, verb, endpoint string, in apimod return apimodels.ErrInvalidToken } - var apiError *apimodels.APIError if resp.StatusCode != http.StatusOK { - apiError, err = apimodels.FromHttpResponse(resp) - if err != nil { - return err + if apiError := apimodels.GenerateAPIErrorFromHTTPResponse(resp); apiError != nil { + return apiError } } - if apiError != nil { - return apiError - } - if out != nil { if err := decodeBody(resp, &out); err != nil { return err @@ -362,12 +350,13 @@ func (c *httpClient) interceptError(ctx context.Context, err error, resp *http.R WithCode(bacerrors.UnauthorizedError) } - apiError, apiErr := apimodels.FromHttpResponse(resp) - if apiErr == nil { + apiError := apimodels.GenerateAPIErrorFromHTTPResponse(resp) + if apiError != nil { return apiError.ToBacError() } - return bacerrors.Wrap(apiErr, "server error"). + return bacerrors.New("server error"). + WithHTTPStatusCode(http.StatusInternalServerError). WithCode(bacerrors.InternalError) } From 54e8e0771fc668865e748916d70d054ebc0cd158 Mon Sep 17 00:00:00 2001 From: Jamil Shamy <4977827+jamlo@users.noreply.github.com> Date: Wed, 9 Oct 2024 07:42:50 -0400 Subject: [PATCH 2/2] Support running Bacalhau in Docker compose (#4596) This pull requests addresses issue #4595 --- .pre-commit-config.yaml | 2 +- test-integration/Dockerfile-ClientNode | 27 +++ test-integration/Dockerfile-ComputeNode | 24 +++ .../Dockerfile-DockerImageRegistryNode | 24 +++ test-integration/Dockerfile-RequesterNode | 22 ++ test-integration/README.md | 198 ++++++++++++++++++ test-integration/certificates/README.md | 9 + .../certificates/generate_leaf_certs.sh | 71 +++++++ .../certificates/generate_root_ca.sh | 29 +++ .../bacalhau-container-img-registry-node.crt | 33 +++ .../bacalhau-container-img-registry-node.key | 52 +++++ .../bacalhau_test_root_ca.crt | 31 +++ .../bacalhau_test_root_ca.key | 52 +++++ test-integration/compute_node_image_setup.sh | 38 ++++ test-integration/docker-compose.yml | 117 +++++++++++ 15 files changed, 728 insertions(+), 1 deletion(-) create mode 100644 test-integration/Dockerfile-ClientNode create mode 100644 test-integration/Dockerfile-ComputeNode create mode 100644 test-integration/Dockerfile-DockerImageRegistryNode create mode 100644 test-integration/Dockerfile-RequesterNode create mode 100644 test-integration/README.md create mode 100644 test-integration/certificates/README.md create mode 100755 test-integration/certificates/generate_leaf_certs.sh create mode 100755 test-integration/certificates/generate_root_ca.sh create mode 100644 test-integration/certificates/generated_assets/bacalhau-container-img-registry-node.crt create mode 100644 test-integration/certificates/generated_assets/bacalhau-container-img-registry-node.key create mode 100644 test-integration/certificates/generated_assets/bacalhau_test_root_ca.crt create mode 100644 test-integration/certificates/generated_assets/bacalhau_test_root_ca.key create mode 100755 test-integration/compute_node_image_setup.sh create mode 100644 test-integration/docker-compose.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ee7d6b8d4a..ee46828daa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - id: detect-aws-credentials args: [--allow-missing-credentials] - id: detect-private-key - exclude: testdata/.* + exclude: 'testdata/.*|test-integration/certificates/.*' - id: check-yaml - id: check-json - repo: https://github.com/astral-sh/ruff-pre-commit diff --git a/test-integration/Dockerfile-ClientNode b/test-integration/Dockerfile-ClientNode new file mode 100644 index 0000000000..da31f340f7 --- /dev/null +++ b/test-integration/Dockerfile-ClientNode @@ -0,0 +1,27 @@ +# Use the docker:dind image as the base image +FROM docker:dind + +# Set the working directory +WORKDIR /app + +# Install curl and bash +RUN apk update && apk add --no-cache curl bash + +# Install the ca-certificates package +RUN apk add --no-cache ca-certificates + +# Copy a root ca into the image +COPY certificates/generated_assets/bacalhau_test_root_ca.crt /usr/local/share/ca-certificates/bacalhau_test_root_ca.crt + +# Update CA certificates +RUN update-ca-certificates + +# Download and execute the Bash script from the given URL +RUN curl -sSL https://get.bacalhau.org/install.sh | bash + +# Download the binary, make it executable, and move it to /usr/local/bin +RUN curl -o /tmp/mc https://dl.min.io/client/mc/release/linux-amd64/mc \ + && chmod +x /tmp/mc \ + && mv /tmp/mc /usr/local/bin/ + +ENTRYPOINT ["dockerd-entrypoint.sh"] diff --git a/test-integration/Dockerfile-ComputeNode b/test-integration/Dockerfile-ComputeNode new file mode 100644 index 0000000000..7a6cc4ebaa --- /dev/null +++ b/test-integration/Dockerfile-ComputeNode @@ -0,0 +1,24 @@ +# Use the docker:dind image as the base image +FROM docker:dind + +# Set the working directory +WORKDIR /app + +# Install curl and bash +RUN apk update && apk add --no-cache curl bash + +# Install the ca-certificates package +RUN apk add --no-cache ca-certificates + +# Copy a root ca into the image +COPY certificates/generated_assets/bacalhau_test_root_ca.crt /usr/local/share/ca-certificates/bacalhau_test_root_ca.crt + +# Update CA certificates +RUN update-ca-certificates + +# Download and execute the Bash script from the given URL +RUN curl -sSL https://get.bacalhau.org/install.sh | bash + +COPY compute_node_image_setup.sh compute_node_image_setup.sh +ENTRYPOINT ["/usr/bin/env"] +CMD ./compute_node_image_setup.sh diff --git a/test-integration/Dockerfile-DockerImageRegistryNode b/test-integration/Dockerfile-DockerImageRegistryNode new file mode 100644 index 0000000000..9c38ba886e --- /dev/null +++ b/test-integration/Dockerfile-DockerImageRegistryNode @@ -0,0 +1,24 @@ +FROM registry:2 + +# Install curl and bash +RUN apk update && apk add --no-cache curl bash + +# Install the ca-certificates package +RUN apk add --no-cache ca-certificates + +# Copy a root ca into the image +COPY certificates/generated_assets/bacalhau_test_root_ca.crt /usr/local/share/ca-certificates/bacalhau_test_root_ca.crt + +# Create a directory to store certificates to be used by the registry +RUN mkdir /certs + +# Copy the certificate and key from the local directory to /certs +COPY certificates/generated_assets/bacalhau-container-img-registry-node.crt /certs/ +COPY certificates/generated_assets/bacalhau-container-img-registry-node.key /certs/ + +# Ensure proper permissions for certs +RUN chmod 600 /certs/bacalhau-container-img-registry-node.key +RUN chmod 644 /certs/bacalhau-container-img-registry-node.crt + +# Expose the registry's default port +EXPOSE 5000 443 diff --git a/test-integration/Dockerfile-RequesterNode b/test-integration/Dockerfile-RequesterNode new file mode 100644 index 0000000000..cbbd207c32 --- /dev/null +++ b/test-integration/Dockerfile-RequesterNode @@ -0,0 +1,22 @@ +# Use the docker:dind image as the base image +FROM docker:dind + +# Set the working directory +WORKDIR /app + +# Install curl and bash +RUN apk update && apk add --no-cache curl bash + +# Install the ca-certificates package +RUN apk add --no-cache ca-certificates + +# Copy a root ca into the image +COPY certificates/generated_assets/bacalhau_test_root_ca.crt /usr/local/share/ca-certificates/bacalhau_test_root_ca.crt + +# Update CA certificates +RUN update-ca-certificates + +# Download and execute the Bash script from the given URL +RUN curl -sSL https://get.bacalhau.org/install.sh | bash + +ENTRYPOINT ["dockerd-entrypoint.sh"] diff --git a/test-integration/README.md b/test-integration/README.md new file mode 100644 index 0000000000..bada3d6e3c --- /dev/null +++ b/test-integration/README.md @@ -0,0 +1,198 @@ +# Running Bacalhau on Docker + +## Overview + +Since Bacalhau is a distributed system with multiple components, it is critical to have a reliable method for end-to-end testing. Additionally, it's important that these tests closely resemble a real production environment without relying on mocks. + +This setup addresses those needs by running Bacalhau inside containers while also supporting Docker workloads within these containers (using Docker-in-Docker, or DinD). + +## Architecture + +- A Requester Docker container, running Bacalhau as a requester node. +- A Compute Docker container, running Bacalhau as a compute node and is configured to run Docker containers inside it. +- A Bacalhau Client Docker container to act as a jumpbox to interact with this Bacalhau deployment. +- A [Registry](https://github.com/distribution/distribution/) Docker container to act as the local container image registry. +- A Minio Docker container to support running S3 compatible input/output jobs. +- Docker Compose is used to create 5 services: the Requester Node, the Compute Node, the Client CLI Node, the registry node, and the Minio node. +- All the services are connected on the same Docker network, allowing them to communicate over the bridged network. +- All the containers have an injected custom Certificate Authority, which is used for a portion of the internal TLS communication. + - TODO: Expand the TLS setup to more components. Now it is used for the registry communication only. + +## Setup + +--- +### Build the Docker Images + +Build the Requester Node image: +```shell +docker build -f Dockerfile-RequesterNode -t bacalhau-requester-node-image . +``` + +Build the Compute Node image: +```shell +docker build -f Dockerfile-ComputeNode -t bacalhau-compute-node-image . +``` + +Build the Client Node image: +```shell +docker build -f Dockerfile-ClientNode -t bacalhau-client-node-image . +``` + +Build the Registry Node image: +```shell +docker build -f Dockerfile-DockerImageRegistryNode -t bacalhau-container-img-registry-node-image . +``` + +After running these commands, you should see the above images created: +```shell +docker image ls +``` +--- +### Running the setup + +Run Docker Compose: +```shell +docker-compose up +``` + +Access the utility client container to use the Bacalhau CLI: +```shell +docker exec -it bacalhau-client-node-container /bin/bash +``` + +Once inside the container, you can run the following commands to verify the setup: +```shell +# You should see two nodes: a Requestor and a Compute Node +bacalhau node list +``` + +Run a test workload +```shell +bacalhau docker run hello-world + +# Describe the job; it should have completed successfully. +bacalhau job describe ........ +``` + +In another terminal window, you can follow the logs of the Requester node, and compute node +```shell +docker logs bacalhau-requester-node-container -f +docker logs bacalhau-compute-node-container -f +``` + +--- +### Setting Up Minio + +Access the utility client container to use the Bacalhau CLI: +```shell +docker exec -it bacalhau-client-node-container /bin/bash +``` + +Setup an alias for the Minio CLI +```shell +# The environment variables are already injected in +# the container, no need to replce them yourself. +mc alias set bacalhau-minio "http://${BACALHAU_MINIO_NODE_HOST}:9000" "${MINIO_ROOT_USER}" "${MINIO_ROOT_PASSWORD}" +mc admin info bacalhau-minio +``` + +Create a bucket and add some files +```shell +mc mb bacalhau-minio/my-data-bucket +mc ls bacalhau-minio/my-data-bucket/section1/ +echo "This is a sample text hello hello." > example.txt +mc cp example.txt bacalhau-minio/my-data-bucket/section1/ +``` + +RUn a job with data input from the minion bucket + +```shell +# Content of aws-test-job.yaml below +bacalhau job run aws-test-job.yaml +``` + +```yaml +Name: S3 Job Data Access Test +Type: batch +Count: 1 +Tasks: + - Name: main + Engine: + Type: docker + Params: + Image: ubuntu:latest + Entrypoint: + - /bin/bash + Parameters: + - "-c" + - "cat /put-my-s3-data-here/example.txt" + InputSources: + - Target: "/put-my-s3-data-here" + Source: + Type: s3 + Params: + Bucket: "my-data-bucket" + Key: "section1/" + Endpoint: "http://bacalhau-minio-node:9000" + Region: "us-east-1" # If no region added, it fails, even for minio +``` + +--- +### Setting Up private registry + +This docker compose deployment has a private registry deployed on its own node. It allows us to +create tests and experiment with docker images jobs without the need to use DockerHub in anyway. + +From inside the client container, let's pull an image from DockerHub, push it to our own private registry, +then run a docker job running the image in out private registry. + +```shell +# pull from docker hub +docker pull ubuntu + +# tag the image to prepare it to be push to our private registry +docker image tag ubuntu bacalhau-container-img-registry-node:5000/firstbacalhauimage + +# push the image to our private registry +docker push bacalhau-container-img-registry-node:5000/firstbacalhauimage +``` + +Now, let's create a job that references that image in private registry: + +```shell +# Content of private-registry-test-job.yaml below +bacalhau job run private-registry-test-job.yaml +``` + +```yaml +Name: Job to test using local registry images +Type: batch +Count: 1 +Tasks: + - Name: main + Engine: + Type: docker + Params: + Image: bacalhau-container-img-registry-node:5000/firstbacalhauimage + Entrypoint: + - /bin/bash + Parameters: + - "-c" + - "echo test-local-registry" +``` + +--- +### Notes: + +If for some reason after running `docker-compose up`, you faced issues with the Image registry node starting, try to remove the image registry docker volume by running: + +```shell +# Destroy the deployment +docker-compose down + +# Remove registry volume +docker volume rm test-integration_registry-volume + +# Create deployment again +docker-compose up +``` diff --git a/test-integration/certificates/README.md b/test-integration/certificates/README.md new file mode 100644 index 0000000000..f993908841 --- /dev/null +++ b/test-integration/certificates/README.md @@ -0,0 +1,9 @@ +# Certificate Generation + +The script in the folder allows you to generate certificates that are signed by a root CA, and provide the +CN and SAN for these leaf certs. The generated certs will be added to the `generated_assets` directory. + +Usage: `./generate_leaf_certs.sh ` +```shell +./generate_leaf_certs.sh my-bacalhau-requester-node +``` diff --git a/test-integration/certificates/generate_leaf_certs.sh b/test-integration/certificates/generate_leaf_certs.sh new file mode 100755 index 0000000000..0411adc9d3 --- /dev/null +++ b/test-integration/certificates/generate_leaf_certs.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# Set variables +ROOT_CA_CERT="generated_assets/bacalhau_test_root_ca.crt" +ROOT_CA_KEY="generated_assets/bacalhau_test_root_ca.key" +DAYS_VALID=1825 # 5 years + +# Organization name and country (same as before) +ORG_NAME="Bacalhau" +COUNTRY="US" + +# Check if the input argument is provided +if [[ -z "$1" ]]; then + echo "Error: Please provide a string for the Common Name and Subject Alternative Names." + exit 1 +fi + +COMMON_NAME="$1" +OUTPUT_CERT="generated_assets/${COMMON_NAME}.crt" +OUTPUT_KEY="generated_assets/${COMMON_NAME}.key" +CSR_PATH="generated_assets/${COMMON_NAME}.csr" +CNF_PATH="generated_assets/${COMMON_NAME}.cnf" + +# Check if the files already exist +if [[ -f "${OUTPUT_CERT}" ]] || [[ -f "${OUTPUT_KEY}" ]]; then + echo "Error: One or both of the following files already exist:" + [[ -f "${OUTPUT_CERT}" ]] && echo " - ${OUTPUT_CERT}" + [[ -f "${OUTPUT_KEY}" ]] && echo " - ${OUTPUT_KEY}" + echo "Please remove or rename the existing files before running this script." + exit 1 +fi + +# Generate a private key for the new certificate +echo "Generating certificate signed by the root CA..." +openssl genpkey -algorithm RSA -out "${OUTPUT_KEY}" -pkeyopt rsa_keygen_bits:4096 + +# Create an OpenSSL configuration file for the SAN +cat > "${CNF_PATH}" </dev/null 2>&1; then + echo "dockerd is available! Now Starting Bacalhau as a compute node" + bacalhau config set compute.auth.token="${NETWORK_AUTH_TOKEN}" + bacalhau serve --compute -c compute.orchestrators="nats://${REQUESTER_NODE_LINK}:4222" + # Wait for any process to exit + wait -n + + # Exit with status of process that exited first + exit $? + fi + + # Wait before retrying + echo "dockerd is not available yet. Retrying in ${RETRY_INTERVAL} seconds..." + sleep "${RETRY_INTERVAL}" + + # Increment attempt counter + attempt=$((attempt + 1)) +done + +echo "dockerd did not become available within ${TOTAL_WAIT_TIME_FOR_DOCKERD} seconds." +exit 1 diff --git a/test-integration/docker-compose.yml b/test-integration/docker-compose.yml new file mode 100644 index 0000000000..2340fba1a6 --- /dev/null +++ b/test-integration/docker-compose.yml @@ -0,0 +1,117 @@ +x-common-env-variables: &common-env-variables + NETWORK_AUTH_TOKEN: "i_am_very_secret_token" + BACALHAU_API_PORT: "1234" + MINIO_ROOT_USER: "minioadmin" + MINIO_ROOT_PASSWORD: "minioadminpass" + AWS_ACCESS_KEY_ID: "minioadmin" + AWS_SECRET_ACCESS_KEY: "minioadminpass" + +networks: + bacalhau-network: + driver: bridge + +volumes: + minio-volume: + driver: local + registry-volume: + driver: local + +services: + bacalhau-minio-node: + image: quay.io/minio/minio + container_name: bacalhau-minio-node-container + command: server /data --console-address ":9001" + volumes: + - minio-volume:/data + restart: always + networks: + - bacalhau-network + environment: *common-env-variables + healthcheck: + test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ] + interval: 1s + timeout: 5s + retries: 30 + start_period: 2s + + bacalhau-container-img-registry-node: + image: bacalhau-container-img-registry-node-image + container_name: bacalhau-container-img-registry-container + volumes: + - registry-volume:/var/lib/registry + restart: always + networks: + - bacalhau-network + environment: + REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY: /var/lib/registry + REGISTRY_HTTP_ADDR: "0.0.0.0:5000" + REGISTRY_HTTP_TLS_CERTIFICATE: "/certs/bacalhau-container-img-registry-node.crt" + REGISTRY_HTTP_TLS_KEY: "/certs/bacalhau-container-img-registry-node.key" + healthcheck: + test: [ "CMD-SHELL", "nc -zv localhost 5000" ] + interval: 1s + timeout: 5s + retries: 30 + start_period: 2s + + bacalhau-requester-node: + image: bacalhau-requester-node-image + container_name: bacalhau-requester-node-container + networks: + - bacalhau-network + environment: *common-env-variables + depends_on: + bacalhau-minio-node: + condition: service_healthy + privileged: true + command: + - /bin/bash + - -c + - | + bacalhau config set "orchestrator.auth.token" "$${NETWORK_AUTH_TOKEN}" && bacalhau serve --orchestrator -c api.port=$${BACALHAU_API_PORT} + healthcheck: + test: [ "CMD-SHELL", "nc -zv localhost 1234" ] + interval: 1s + timeout: 5s + retries: 30 + start_period: 2s + + bacalhau-compute-node: + image: bacalhau-compute-node-image + container_name: bacalhau-compute-node-container + privileged: true + networks: + - bacalhau-network + depends_on: + bacalhau-requester-node: + condition: service_healthy + bacalhau-container-img-registry-node: + condition: service_healthy + environment: + <<: *common-env-variables + REQUESTER_NODE_LINK: 'bacalhau-requester-node' + healthcheck: + test: [ "CMD-SHELL", "nc -zv localhost 1234" ] + interval: 1s + timeout: 5s + retries: 30 + start_period: 2s + + bacalhau-client-node: + image: bacalhau-client-node-image + container_name: bacalhau-client-node-container + privileged: true + networks: + - bacalhau-network + depends_on: + bacalhau-requester-node: + condition: service_healthy + bacalhau-compute-node: + condition: service_healthy + bacalhau-container-img-registry-node: + condition: service_healthy + environment: + <<: *common-env-variables + BACALHAU_API_HOST: 'bacalhau-requester-node' + BACALHAU_COMPUTE_NODE_HOST: 'bacalhau-compute-node' + BACALHAU_MINIO_NODE_HOST: 'bacalhau-minio-node'