Skip to content

Commit

Permalink
fix terraform build from source + support nats
Browse files Browse the repository at this point in the history
  • Loading branch information
wdbaruni committed Jan 7, 2024
1 parent 3dcec06 commit d15baac
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 57 deletions.
7 changes: 4 additions & 3 deletions ops/terraform/dev.tfvars
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
bacalhau_version = "v1.1.3"
bacalhau_branch = ""
bacalhau_version = ""
bacalhau_branch = "nats"
bacalhau_port = "1235"
bacalhau_node_id_0 = "QmfYBQ3HouX9zKcANNXbgJnpyLpTYS9nKBANw6RUQKZffu"
bacalhau_node_id_1 = "QmNjEQByyK8GiMTvnZqGyURuwXDCtzp9X6gJRKkpWfai7S"
Expand Down Expand Up @@ -28,4 +28,5 @@ public_ip_addresses = ["34.86.177.175", "35.245.221.171"]
num_gpu_machines = 0
log_level = "debug"
otel_collector_version = "0.70.0"
otel_collector_endpoint = "http://localhost:4318"
otel_collector_endpoint = "http://localhost:4318"
use_nats = true
8 changes: 8 additions & 0 deletions ops/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ export GRAFANA_CLOUD_TEMPO_ENDPOINT="${var.grafana_cloud_tempo_endpoint}"
export OTEL_COLLECTOR_VERSION="${var.otel_collector_version}"
export OTEL_EXPORTER_OTLP_ENDPOINT="${var.otel_collector_endpoint}"
export OTEL_RESOURCE_ATTRIBUTES="deployment.environment=${terraform.workspace}"
export BACALHAU_NODE_NETWORK_USENATS=${var.use_nats}
export BACALHAU_NODE_NETWORK_ORCHESTRATORS="${var.internal_ip_addresses[0]}:4222"
export BACALHAU_NODE_NETWORK_ADVERTISEDADDRESS="${var.public_ip_addresses[count.index]}:4222"
export BACALHAU_NODE_NETWORK_CLUSTER_PEERS="${var.internal_ip_addresses[0]}:6222"
### secrets are installed in the install-node.sh script
export SECRETS_GRAFANA_CLOUD_PROMETHEUS_API_KEY="${var.grafana_cloud_prometheus_api_key}"
Expand Down Expand Up @@ -295,6 +299,8 @@ resource "google_compute_firewall" "bacalhau_ingress_firewall" {
"55679", // otel collector zpages extension
"44443", // nginx is healthy - for running health check scripts
"44444", // nginx node health check scripts
"4222", // nats
"6222", // nats cluster
]
}

Expand All @@ -320,6 +326,8 @@ resource "google_compute_firewall" "bacalhau_egress_firewall" {
ports = [
"4001", // ipfs swarm
"1235", // bacalhau swarm
"4222", // nats
"6222", // nats cluster
]
}

Expand Down
6 changes: 4 additions & 2 deletions ops/terraform/remote_files/scripts/install-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,14 @@ function install-bacalhau-from-release() {

function install-bacalhau-from-source() {
echo "Installing Bacalhau from branch ${BACALHAU_BRANCH}"
sudo apt-get -y install --no-install-recommends jq nodejs npm make
# make sure we have the desired version of nodejs to build webui
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
sudo apt-get -y install --no-install-recommends jq nodejs make
git clone --branch ${BACALHAU_BRANCH} https://github.com/bacalhau-project/bacalhau.git
pushd bacalhau
pushd webui && npm install && popd
make build-bacalhau
sudo mv ./bin/*/bacalhau /usr/local/bin/bacalhau
sudo mv ./bin/*/*/bacalhau /usr/local/bin/bacalhau
popd
}

Expand Down
121 changes: 69 additions & 52 deletions ops/terraform/remote_files/scripts/start-bacalhau.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,60 +20,77 @@ mount /dev/sdb /data || true
# import the secrets
source /data/secrets.sh

function getMultiaddress() {
echo -n "/ip4/${1}/tcp/${BACALHAU_PORT}/p2p/${2}"
}

# we start with none as the default ("none" prevents the node connecting to our default bootstrap list)
export CONNECT_PEER="none"

# use the BACALHAU_CONNECT_PEER env var if it is set
if [[ -n "${BACALHAU_CONNECT_PEER}" ]]; then
export CONNECT_PEER=$BACALHAU_CONNECT_PEER
# if we are node0 then we do not connect to anything
elif [[ "${TERRAFORM_NODE_INDEX}" != "0" ]]; then
# if we are in unsafe mode - then we connect to a single node and it's ID
# is pre-determined by the $BACALHAU_NODE0_UNSAFE_ID variable
if [[ -n "${BACALHAU_UNSAFE_CLUSTER}" ]]; then
export UNSAFE_NODE0_ID="$BACALHAU_NODE_ID_0"
if [[ -z "$UNSAFE_NODE0_ID" ]]; then
export UNSAFE_NODE0_ID="$BACALHAU_NODE0_UNSAFE_ID"
fi
export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$UNSAFE_NODE0_ID")
# otherwise we will construct our connect string based on
# what node index we are
else
# we are > node0 so we can connect to node0
export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$BACALHAU_NODE_ID_0")
# we are > node1 so we can also connect to node1
if [[ "${TERRAFORM_NODE_INDEX}" -ge "2" ]]; then
export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE1_IP" "$BACALHAU_NODE_ID_1")"
fi
# we are > node2 so we can also connect to node2
if [[ "${TERRAFORM_NODE_INDEX}" -ge "3" ]]; then
export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE2_IP" "$BACALHAU_NODE_ID_2")"
fi
fi
fi

BACALHAU_PROBE_EXEC='/terraform_node/apply-http-allowlist.sh'

TRUSTED_CLIENT_IDS="\
1df7b01ed77ca81bb6d6f06f6cbcd76a6a9e450d175dfac1e4ba70494fddd576,\
b43517b5449d383ab00ca1d2b1c558d710ba79f51c800fbf4c35ed4d0198aec5"

bacalhau serve \
--node-type "${BACALHAU_NODE_TYPE}" \
--job-selection-data-locality anywhere \
--job-selection-accept-networked \
--job-selection-probe-exec "${BACALHAU_PROBE_EXEC}" \
--max-job-execution-timeout '60m' \
--job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \
--ipfs-swarm-addrs "" \
--ipfs-connect /ip4/127.0.0.1/tcp/5001 \
--swarm-port "${BACALHAU_PORT}" \
--api-port 1234 \
--peer "${CONNECT_PEER}" \
--private-internal-ipfs=false \
--web-ui "${BACALHAU_NODE_WEBUI}" \
--labels owner=bacalhau
# Check if using NATS
if [[ "${BACALHAU_NODE_NETWORK_USENATS}" == "true" ]]; then
# nats related config as set as env vars in main.tf and no need to pass them to serve command
bacalhau serve \
--node-type "${BACALHAU_NODE_TYPE}" \
--job-selection-data-locality anywhere \
--job-selection-accept-networked \
--job-selection-probe-exec "${BACALHAU_PROBE_EXEC}" \
--max-job-execution-timeout '60m' \
--job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \
--ipfs-swarm-addrs "" \
--ipfs-connect /ip4/127.0.0.1/tcp/5001 \
--api-port 1234 \
--private-internal-ipfs=false \
--web-ui "${BACALHAU_NODE_WEBUI}" \
--web-ui-port 80 \
--labels owner=bacalhau

else
function getMultiaddress() {
echo -n "/ip4/${1}/tcp/${BACALHAU_PORT}/p2p/${2}"
}

# use the BACALHAU_CONNECT_PEER env var if it is set
if [[ -n "${BACALHAU_CONNECT_PEER}" ]]; then
export CONNECT_PEER=$BACALHAU_CONNECT_PEER
# if we are node0 then we do not connect to anything
elif [[ "${TERRAFORM_NODE_INDEX}" != "0" ]]; then
# if we are in unsafe mode - then we connect to a single node and it's ID
# is pre-determined by the $BACALHAU_NODE0_UNSAFE_ID variable
if [[ -n "${BACALHAU_UNSAFE_CLUSTER}" ]]; then
export UNSAFE_NODE0_ID="$BACALHAU_NODE_ID_0"
if [[ -z "$UNSAFE_NODE0_ID" ]]; then
export UNSAFE_NODE0_ID="$BACALHAU_NODE0_UNSAFE_ID"
fi
export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$UNSAFE_NODE0_ID")
# otherwise we will construct our connect string based on
# what node index we are
else
# we are > node0 so we can connect to node0
export CONNECT_PEER=$(getMultiaddress "$TERRAFORM_NODE0_IP" "$BACALHAU_NODE_ID_0")
# we are > node1 so we can also connect to node1
if [[ "${TERRAFORM_NODE_INDEX}" -ge "2" ]]; then
export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE1_IP" "$BACALHAU_NODE_ID_1")"
fi
# we are > node2 so we can also connect to node2
if [[ "${TERRAFORM_NODE_INDEX}" -ge "3" ]]; then
export CONNECT_PEER="$CONNECT_PEER,$(getMultiaddress "$TERRAFORM_NODE2_IP" "$BACALHAU_NODE_ID_2")"
fi
fi
fi

bacalhau serve \
--node-type "${BACALHAU_NODE_TYPE}" \
--job-selection-data-locality anywhere \
--job-selection-accept-networked \
--job-selection-probe-exec "${BACALHAU_PROBE_EXEC}" \
--max-job-execution-timeout '60m' \
--job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \
--ipfs-swarm-addrs "" \
--ipfs-connect /ip4/127.0.0.1/tcp/5001 \
--swarm-port "${BACALHAU_PORT}" \
--api-port 1234 \
--peer "${CONNECT_PEER}" \
--private-internal-ipfs=false \
--web-ui "${BACALHAU_NODE_WEBUI}" \
--web-ui-port 80 \
--labels owner=bacalhau
fi
6 changes: 6 additions & 0 deletions ops/terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,9 @@ variable "docker_password" {
default = ""
sensitive = true
}

// Use NATs for transport instead of libp2p
variable "use_nats" {
type = bool
default = false
}

0 comments on commit d15baac

Please sign in to comment.