diff --git a/ops/terraform/dev.tfvars b/ops/terraform/dev.tfvars index 3858f58fb0..235014e11d 100644 --- a/ops/terraform/dev.tfvars +++ b/ops/terraform/dev.tfvars @@ -1,5 +1,5 @@ -bacalhau_version = "v1.1.3" -bacalhau_branch = "" +bacalhau_version = "" +bacalhau_branch = "nats" bacalhau_port = "1235" bacalhau_node_id_0 = "QmfYBQ3HouX9zKcANNXbgJnpyLpTYS9nKBANw6RUQKZffu" bacalhau_node_id_1 = "QmNjEQByyK8GiMTvnZqGyURuwXDCtzp9X6gJRKkpWfai7S" @@ -28,4 +28,5 @@ public_ip_addresses = ["34.86.177.175", "35.245.221.171"] num_gpu_machines = 0 log_level = "debug" otel_collector_version = "0.70.0" -otel_collector_endpoint = "http://localhost:4318" \ No newline at end of file +otel_collector_endpoint = "http://localhost:4318" +use_nats = true \ No newline at end of file diff --git a/ops/terraform/main.tf b/ops/terraform/main.tf index ea40f96230..2b383f4197 100644 --- a/ops/terraform/main.tf +++ b/ops/terraform/main.tf @@ -71,6 +71,10 @@ export GRAFANA_CLOUD_TEMPO_ENDPOINT="${var.grafana_cloud_tempo_endpoint}" export OTEL_COLLECTOR_VERSION="${var.otel_collector_version}" export OTEL_EXPORTER_OTLP_ENDPOINT="${var.otel_collector_endpoint}" export OTEL_RESOURCE_ATTRIBUTES="deployment.environment=${terraform.workspace}" +export BACALHAU_NODE_NETWORK_USENATS=${var.use_nats} +export BACALHAU_NODE_NETWORK_ORCHESTRATORS="${var.internal_ip_addresses[0]}:4222" +export BACALHAU_NODE_NETWORK_ADVERTISEDADDRESS="${var.public_ip_addresses[count.index]}:4222" +export BACALHAU_NODE_NETWORK_CLUSTER_PEERS="${var.internal_ip_addresses[0]}:6222" ### secrets are installed in the install-node.sh script export SECRETS_GRAFANA_CLOUD_PROMETHEUS_API_KEY="${var.grafana_cloud_prometheus_api_key}" @@ -295,6 +299,8 @@ resource "google_compute_firewall" "bacalhau_ingress_firewall" { "55679", // otel collector zpages extension "44443", // nginx is healthy - for running health check scripts "44444", // nginx node health check scripts + "4222", // nats + "6222", // nats cluster ] } @@ -320,6 +326,8 @@ resource "google_compute_firewall" "bacalhau_egress_firewall" { ports = [ "4001", // ipfs swarm "1235", // bacalhau swarm + "4222", // nats + "6222", // nats cluster ] } diff --git a/ops/terraform/remote_files/scripts/install-node.sh b/ops/terraform/remote_files/scripts/install-node.sh index fec079b7f3..8f6deab21f 100644 --- a/ops/terraform/remote_files/scripts/install-node.sh +++ b/ops/terraform/remote_files/scripts/install-node.sh @@ -117,12 +117,14 @@ function install-bacalhau-from-release() { function install-bacalhau-from-source() { echo "Installing Bacalhau from branch ${BACALHAU_BRANCH}" - sudo apt-get -y install --no-install-recommends jq nodejs npm make + # make sure we have the desired version of nodejs to build webui + curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - + sudo apt-get -y install --no-install-recommends jq nodejs make git clone --branch ${BACALHAU_BRANCH} https://github.com/bacalhau-project/bacalhau.git pushd bacalhau pushd webui && npm install && popd make build-bacalhau - sudo mv ./bin/*/bacalhau /usr/local/bin/bacalhau + sudo mv ./bin/*/*/bacalhau /usr/local/bin/bacalhau popd } diff --git a/ops/terraform/remote_files/scripts/start-bacalhau.sh b/ops/terraform/remote_files/scripts/start-bacalhau.sh index 63a0d49288..e23cfccc64 100644 --- a/ops/terraform/remote_files/scripts/start-bacalhau.sh +++ b/ops/terraform/remote_files/scripts/start-bacalhau.sh @@ -20,60 +20,77 @@ mount /dev/sdb /data || true # import the secrets source /data/secrets.sh -function getMultiaddress() { - echo -n "/ip4/${1}/tcp/${BACALHAU_PORT}/p2p/${2}" -} - -# we start with none as the default ("none" prevents the node connecting to our default bootstrap list) -export CONNECT_PEER="none" - -# use the BACALHAU_CONNECT_PEER env var if it is set -if [[ -n "${BACALHAU_CONNECT_PEER}" ]]; then - export CONNECT_PEER=$BACALHAU_CONNECT_PEER -# if we are node0 then we do not connect to anything -elif [[ "${TERRAFORM_NODE_INDEX}" != "0" ]]; then - # if we are in unsafe mode - then we connect to a single node and it's ID - # is pre-determined by the $BACALHAU_NODE0_UNSAFE_ID variable - if [[ -n "${BACALHAU_UNSAFE_CLUSTER}" ]]; then - export UNSAFE_NODE0_ID="$BACALHAU_NODE_ID_0" - if [[ -z "$UNSAFE_NODE0_ID" ]]; then - export UNSAFE_NODE0_ID="$BACALHAU_NODE0_UNSAFE_ID" - fi - export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$UNSAFE_NODE0_ID") - # otherwise we will construct our connect string based on - # what node index we are - else - # we are > node0 so we can connect to node0 - export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$BACALHAU_NODE_ID_0") - # we are > node1 so we can also connect to node1 - if [[ "${TERRAFORM_NODE_INDEX}" -ge "2" ]]; then - export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE1_IP" "$BACALHAU_NODE_ID_1")" - fi - # we are > node2 so we can also connect to node2 - if [[ "${TERRAFORM_NODE_INDEX}" -ge "3" ]]; then - export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE2_IP" "$BACALHAU_NODE_ID_2")" - fi - fi -fi - BACALHAU_PROBE_EXEC='/terraform_node/apply-http-allowlist.sh' - TRUSTED_CLIENT_IDS="\ 1df7b01ed77ca81bb6d6f06f6cbcd76a6a9e450d175dfac1e4ba70494fddd576,\ b43517b5449d383ab00ca1d2b1c558d710ba79f51c800fbf4c35ed4d0198aec5" -bacalhau serve \ - --node-type "${BACALHAU_NODE_TYPE}" \ - --job-selection-data-locality anywhere \ - --job-selection-accept-networked \ - --job-selection-probe-exec "${BACALHAU_PROBE_EXEC}" \ - --max-job-execution-timeout '60m' \ - --job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \ - --ipfs-swarm-addrs "" \ - --ipfs-connect /ip4/127.0.0.1/tcp/5001 \ - --swarm-port "${BACALHAU_PORT}" \ - --api-port 1234 \ - --peer "${CONNECT_PEER}" \ - --private-internal-ipfs=false \ - --web-ui "${BACALHAU_NODE_WEBUI}" \ - --labels owner=bacalhau +# Check if using NATS +if [[ "${BACALHAU_NODE_NETWORK_USENATS}" == "true" ]]; then + # nats related config as set as env vars in main.tf and no need to pass them to serve command + bacalhau serve \ + --node-type "${BACALHAU_NODE_TYPE}" \ + --job-selection-data-locality anywhere \ + --job-selection-accept-networked \ + --job-selection-probe-exec "${BACALHAU_PROBE_EXEC}" \ + --max-job-execution-timeout '60m' \ + --job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \ + --ipfs-swarm-addrs "" \ + --ipfs-connect /ip4/127.0.0.1/tcp/5001 \ + --api-port 1234 \ + --private-internal-ipfs=false \ + --web-ui "${BACALHAU_NODE_WEBUI}" \ + --web-ui-port 80 \ + --labels owner=bacalhau + +else + function getMultiaddress() { + echo -n "/ip4/${1}/tcp/${BACALHAU_PORT}/p2p/${2}" + } + + # use the BACALHAU_CONNECT_PEER env var if it is set + if [[ -n "${BACALHAU_CONNECT_PEER}" ]]; then + export CONNECT_PEER=$BACALHAU_CONNECT_PEER + # if we are node0 then we do not connect to anything + elif [[ "${TERRAFORM_NODE_INDEX}" != "0" ]]; then + # if we are in unsafe mode - then we connect to a single node and it's ID + # is pre-determined by the $BACALHAU_NODE0_UNSAFE_ID variable + if [[ -n "${BACALHAU_UNSAFE_CLUSTER}" ]]; then + export UNSAFE_NODE0_ID="$BACALHAU_NODE_ID_0" + if [[ -z "$UNSAFE_NODE0_ID" ]]; then + export UNSAFE_NODE0_ID="$BACALHAU_NODE0_UNSAFE_ID" + fi + export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$UNSAFE_NODE0_ID") + # otherwise we will construct our connect string based on + # what node index we are + else + # we are > node0 so we can connect to node0 + export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$BACALHAU_NODE_ID_0") + # we are > node1 so we can also connect to node1 + if [[ "${TERRAFORM_NODE_INDEX}" -ge "2" ]]; then + export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE1_IP" "$BACALHAU_NODE_ID_1")" + fi + # we are > node2 so we can also connect to node2 + if [[ "${TERRAFORM_NODE_INDEX}" -ge "3" ]]; then + export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE2_IP" "$BACALHAU_NODE_ID_2")" + fi + fi + fi + + bacalhau serve \ + --node-type "${BACALHAU_NODE_TYPE}" \ + --job-selection-data-locality anywhere \ + --job-selection-accept-networked \ + --job-selection-probe-exec "${BACALHAU_PROBE_EXEC}" \ + --max-job-execution-timeout '60m' \ + --job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \ + --ipfs-swarm-addrs "" \ + --ipfs-connect /ip4/127.0.0.1/tcp/5001 \ + --swarm-port "${BACALHAU_PORT}" \ + --api-port 1234 \ + --peer "${CONNECT_PEER}" \ + --private-internal-ipfs=false \ + --web-ui "${BACALHAU_NODE_WEBUI}" \ + --web-ui-port 80 \ + --labels owner=bacalhau +fi \ No newline at end of file diff --git a/ops/terraform/variables.tf b/ops/terraform/variables.tf index 9839690d78..6c4ad01a85 100644 --- a/ops/terraform/variables.tf +++ b/ops/terraform/variables.tf @@ -230,3 +230,9 @@ variable "docker_password" { default = "" sensitive = true } + +// Use NATs for transport instead of libp2p +variable "use_nats" { + type = bool + default = false +} \ No newline at end of file