JRaviLab · falquaddoomi · Jul 30, 2024 · Jul 19, 2024 · Jul 19, 2024 · Jul 30, 2024
diff --git a/.env.TEMPLATE b/.env.TEMPLATE
@@ -0,0 +1,8 @@
+DEFAULT_ENV=dev
+
+# if 0, doesn't open a browser to the frontend webapp on a normal stack launch
+DO_OPEN_BROWSER=1
+
+POSTGRES_USER=molevolvr
+POSTGRES_PASSWORD=
+POSTGRES_DB=molevolvr
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/.env
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+# MolEvolvR Stack
+
+This repo contains the implementation of the MolEvolvR stack, i.e.:
+- `app`: the frontend webapp, written in React
+- `backend`: a backend written in [Plumber](https://www.rplumber.io/index.html)
+- `cluster`: the containerized SLURM "cluster" on which jobs are run
+- `services`: a collection of services on which the stack relies:
+    - `postgres`: configuration for a PostgreSQL database, which stores job information
+
+Most of the data processing is accomplished via the `MolEvolvR` package, which
+is currently available at https://github.com/JRaviLab/molevol_scripts. The stack
+simply provides a user-friendly interface for accepting and monitoring the
+progress of jobs, and orchestrates running the jobs on SLURM. The jobs
+themselves call methods of the package at each stage of processing.
+
+## Running the Stack in Development
+
+To run the stack, you'll need to [install Docker and Docker Compose](https://www.docker.com/).
+
+First, copy `.env.TEMPLATE` to `.env` and fill in the necessary values. You
+should supply a random password for the `POSTGRES_PASSWORD` variable. Of note
+is the `DEFAULT_ENV` variable, which gives `run_stack.sh` a default environment
+in which to operate; in development, this should be set to `dev`.
+
+Then, you can run the following command to bring up the stack:
+
+```bash
+./run_stack.sh
+```
+
+This will start the stack in development mode, which automatically reloads the
+backend or frontend when there are changes to their source.
+
+You should then be able to access the frontend at `http://localhost:5173`.
+
+## Production
+
+To run the stack in production, you can run the following
+
+```bash
+./run_stack.sh prod
+```
+
+This will start the stack in production mode.
diff --git a/app/.dockerignore b/app/.dockerignore
@@ -0,0 +1 @@
+node_modules
diff --git a/app/Dockerfile b/app/Dockerfile
@@ -0,0 +1,50 @@
+# from https://bun.sh/guides/ecosystem/docker, with modifications
+# to run a hot-reloading development server
+
+# use the official Bun image
+# see all versions at https://hub.docker.com/r/oven/bun/tags
+FROM oven/bun:1 AS base
+WORKDIR /app
+
+
+# -----------------------------------------------------------
+# install dependencies for dev and prod into temp directories
+FROM base AS install
+
+COPY package.json bun.lockb /temp/dev/
+RUN cd /temp/dev/ && \
+    bun install --frozen-lockfile
+
+# FA: the production-only install is currently commented out since we always
+# require the dev dependencies, specifically vite, to run *or* build the app.
+# i'm leaving it here because perhaps someday we'll think of a reason why we
+# want just the production dependencies.
+
+# # install with --production (exclude devDependencies)
+# COPY package.json bun.lockb /temp/prod/
+# RUN cd /temp/prod && \
+#     bun install --frozen-lockfile --production
+
+# -----------------------------------------------------------
+# copy node_modules from dev stage, copy entire app
+# source into the image
+FROM base AS dev
+COPY --from=install /temp/dev/node_modules node_modules
+COPY . .
+# run the app in hot-reloading development mode
+# set up vite to accept connections on any interface, e.g. from outside the
+# container, and to always run on port 5713)
+CMD [ "vite", "--host", "--port", "5713" ]
+
+
+# -----------------------------------------------------------
+# copy production dependencies and source code into final image
+FROM base AS release
+COPY --from=install /temp/dev/node_modules node_modules
+COPY . .
+
+# produce a bundle that'll then be served via a reverse http proxy, e.g. nginx
+# (you'll want /app/dist to be mapped to a volume that's served by the reverse
+# http proxy)
+CMD [ "vite", "build" ]
+
diff --git a/app/bun.lockb b/app/bun.lockb
diff --git a/app/package.json b/app/package.json
@@ -18,6 +18,7 @@
     "@radix-ui/react-slider": "^1.1.2",
     "@radix-ui/react-tabs": "^1.0.4",
     "@radix-ui/react-tooltip": "^1.0.7",
+    "@tanstack/react-query": "^5.36.2",
     "@tanstack/react-table": "^8.15.3",
     "classnames": "^2.5.1",
     "csv-stringify": "^6.4.6",
@@ -34,7 +35,8 @@
     "react-time-ago": "^7.3.1",
     "react-to-text": "^2.0.1",
     "react-use": "^17.5.0",
-    "use-debounce": "^10.0.0"
+    "use-debounce": "^10.0.0",
+    "use-query-params": "^2.2.1"
   },
   "devDependencies": {
     "@ianvs/prettier-plugin-sort-imports": "^4.2.1",

diff --git a/backend/docker/Dockerfile b/backend/docker/Dockerfile
@@ -0,0 +1,42 @@
+# syntax=docker/dockerfile:1.7
+
+# this Dockerfile should be used with the ./backend/ folder as the context
+# and ./backend/docker/Dockerfile as the dockerfile
+
+FROM rocker/tidyverse:4.3
+
+# install ccache, a compiler cache
+RUN apt-get update && apt-get install -y ccache
+
+# install some common cmd line tools
+RUN apt-get update && apt-get install -y curl
+
+# acquire drip, a plumber auto-reloader, and install
+ENV DRIP_URL="https://rdrip.netlify.app/builds/drip_0.1.0_linux_amd64.zip"
+RUN mkdir -p /tmp/software/ && \
+    wget -L -O /tmp/software/drip.zip ${DRIP_URL} && \
+    unzip /tmp/software/drip.zip -d /tmp/software && \
+    mv /tmp/software/drip /usr/local/bin && \
+    chmod +x /usr/local/bin/drip
+
+# acquire atlas, a schema manager
+RUN curl -sSf https://atlasgo.sh | sh
+
+# configure ccache env vars
+ENV PATH="/usr/lib/ccache:${PATH}"
+ENV CCACHE_DIR="/tmp/ccache"
+
+# install dependencies into the image
+COPY ./docker/install.R /tmp/install.r
+RUN   Rscript /tmp/install.r
+
+# RUN --mount=type=cache,target=/usr/local/lib/R/site-library \
+#     Rscript /tmp/install.r
+
+WORKDIR /app
+
+# copy the app into the image
+COPY . /app
+
+# run the app
+CMD ["/app/launch_api.sh"]
diff --git a/backend/docker/install.R b/backend/docker/install.R
@@ -0,0 +1,11 @@
+# install packages depended on by the molevolvr API server
+install.packages(
+    c(
+        "plumber",    # REST API framework
+        "DBI",        # Database interface
+        "RPostgres",  # PostgreSQL-specific impl. for DBI
+        "dbplyr",     # dplyr for databases
+        "box"         # allows R files to be referenced as modules
+    ),
+    Ncpus = 6
+)
diff --git a/backend/dummy.R b/backend/dummy.R
@@ -0,0 +1,7 @@
+# Load the plumber package
+library(plumber)
+
+#* @get /
+function() {
+  "Hello, world!"
+}
diff --git a/backend/entrypoint.R b/backend/entrypoint.R
@@ -0,0 +1,25 @@
+options(box.path = "/app")
+
+box::use(
+  plumber[plumb],
+  server/tcp_utils[wait_for_port]
+)
+
+# receive the target port as the env var API_PORT, or 9050 if unspecified
+target_port <- as.integer(Sys.getenv("API_PORT", unset=9050))
+
+# workaround for https://github.com/siegerts/drip/issues/3, in which
+# reloading fails due to the port being in use. we just wait, polling
+# occasionally, for up to 60 seconds for the port to become free.
+if (wait_for_port(target_port, poll_interval = 1, verbose = TRUE)) {
+  pr <- plumb("./dummy.R")$run(
+    host="0.0.0.0",
+    port=target_port,
+    debug=TRUE
+  )
+}
+else {
+  stop(
+    paste0("Failed to start the API server; port ", target_port, " still occupied after wait timeout exceeded"
+  )
+}
diff --git a/backend/launch_api.sh b/backend/launch_api.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+# pass off to drip to control serving and reloading the API
+drip
diff --git a/backend/server/tcp_utils.R b/backend/server/tcp_utils.R
@@ -0,0 +1,41 @@
+#' Utility functions for working with TCP ports
+
+#' Check if a port is in use
+#' @param port The port to check
+#' @param host The IP for which to check the port
+#' @return TRUE if the port is in use, FALSE otherwise
+is_port_in_use <- function(port, host = "127.0.0.1") {
+  connection <- try(suppressWarnings(socketConnection(host = host, port = port, timeout = 1, open = "r+")), silent = TRUE)
+  if (inherits(connection, "try-error")) {
+    return(FALSE)  # Port is not in use
+  } else {
+    close(connection)
+    return(TRUE)   # Port is in use
+  }
+}
+
+#' Wait for a port to become free
+#' @param port The port to wait for
+#' @param timeout The maximum time to wait in seconds
+#' @param poll_interval The interval between checks in seconds
+#' @param host The IP for which to check the port
+#' @param verbose Whether to print messages to the console
+#' @return TRUE if the port is free, FALSE if the timeout is reached
+wait_for_port <- function(port, timeout = 60, poll_interval = 5, host = "127.0.0.1", verbose = TRUE) {
+  start_time <- Sys.time()
+  end_time <- start_time + timeout
+
+  while (Sys.time() < end_time) {
+    if (!is_port_in_use(port, host)) {
+      if (verbose) { cat("Port", port, "is now free\n") }
+      return(TRUE)
+    }
+    if (verbose) { cat("Port", port, "is in use. Checking again in", poll_interval, "seconds...\n") }
+    Sys.sleep(poll_interval)
+  }
+
+  if (verbose) { 
+    cat(paste0("Timeout of ", timeout, "s reached, but port ", port, " is still in use, aborting\n"))
+  }
+  return(FALSE)
+}
diff --git a/cluster/README.md b/cluster/README.md
@@ -0,0 +1,4 @@
+# MolEvolvR Cluster
+
+This folder will eventually contain Dockerfiles for building images for a SLURM
+controller and worker nodes.
diff --git a/docker-compose.override.yml b/docker-compose.override.yml
@@ -0,0 +1,36 @@
+services:
+  backend:
+    volumes:
+      - ./backend/:/app/
+      # - ./backend/api/:/app/api/
+      # - ./backend/schema/:/app/schema/
+      # - ./backend/entrypoint.R:/app/entrypoint.R
+      # - ./backend/run_tests.sh:/app/run_tests.sh
+    ports:
+      - "9050:9050"
+    environment:
+      - "POSTGRES_DEV_HOST=dev-db"
+      - "PLUMBER_DEBUG=1"
+    depends_on:
+      - "dev-db"
+
+  app:
+    build:
+      context: ./app
+      target: dev
+    volumes:
+      - ./app/src:/app/src
+    environment:
+      - 'VITE_API=http://localhost:9050'
+    ports:
+      - "5713:5713"
+
+  db:
+    ports:
+      - "5460:5432"
+
+  # used by atlas to create migrations
+  dev-db:
+    image: postgres:16
+    env_file:
+      - .env
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
@@ -0,0 +1,28 @@
+volumes:
+  app_bundle:
+  caddy_data:
+  caddy_config:
+
+services:
+  app:
+    image: molevolvr-frontend
+    build:
+      context: ./app
+      target: release
+    volumes:
+      - app_bundle:/app/dist
+    depends_on:
+      - backend
+
+  caddy:
+    image: caddy:2
+    ports:
+      - "80:80"
+      - "443:443"
+    volumes:
+      - app_bundle:/srv
+      - ./services/caddy/Caddyfile:/etc/caddy/Caddyfile
+      - caddy_data:/data
+      - caddy_config:/config
+    depends_on:
+      - app
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,32 @@
+services:
+  backend:
+    image: molevolvr-backend
+    platform: linux/amd64
+    build:
+      context: ./backend
+      dockerfile: ./docker/Dockerfile
+    env_file:
+      - .env
+    environment:
+      - API_PORT=9050
+      - "POSTGRES_HOST=db"
+    depends_on:
+      db:
+        condition: service_healthy
+
+  app:
+    image: molevolvr-frontend
+    build: ./app
+    depends_on:
+      - backend
+
+  db:
+    image: postgres:16
+    env_file:
+      - .env
+    healthcheck:
+      test: ["CMD-SHELL", "sh -c 'pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}'"]
+      interval: 30s
+      timeout: 60s
+      retries: 5
+      start_period: 80s
diff --git a/run_stack.sh b/run_stack.sh
@@ -0,0 +1,201 @@
+#!/usr/bin/env bash
+
+# NOTES:
+# -------
+# This script launches the molevolvr stack in the specified target environment.
+# It's invoked as ./run_stack [target_env] [docker-compose args]; if
+# [target_env] is not specified, it will attempt to infer it from the repo's
+# directory name, and aborts with an error message if it doesn't find a match.
+# the remainder of the arguments are passed along to docker compose.
+#
+# for example, to launch the stack in the "prod" environment with the "up -d"
+# command, you would run: ./run_stack prod up -d
+#
+# the available environments differ in a variety of ways, including:
+# - which services they run (prod runs 'nginx', for example, but the dev-y envs
+#   don't)
+# - cores and memory constraints that are applied to the SLURM containers, in
+#   environments where the job scheduler is enabled
+# - what external resources they mount as volumes into the container; for
+#   example, each environment mounts a different job results folder, but
+#   environments that process jobs use the same blast and iprscan folders, since
+#   they're gigantic
+#
+# these differences between environments are implemented by invoking different
+# sets of docker-compose.yml files. with the exception of the "app" environment,
+# the "root" compose file, docker-compose.yml, is always used first, and then
+# depending on the environment other compose files are added in, which merge
+# with the root compose configuration. since the app environment only runs the
+# app, it has a separate compose file, docker-compose.apponly.yml, rather than
+# merging with the root and killing nearly all the services except the app
+# service.
+#
+# see the following for details on the semantics of merging compose files:
+# https://docs.docker.com/compose/multiple-compose-files/merge/
+#
+# the current environments are as follows (contact FSA for details):
+# - prod: the production environment, which runs the full stack, including the
+#   shiny app, the job scheduler, and the accounting database. it's the most
+#   resource-intensive environment, and is intended for use in production.
+# - dev/staging: these are effectively dev environments that specific users run
+#   on the server for testing purposes.
+# - app: a development environment that runs only the frontend and backend, and
+#   not the job scheduler or the accounting database. it's intended for use in
+#   frontend development, where you don't need to submit jobs or query the
+#   accounting database.
+
+
+# if 1, skips invoking ./build_images.sh before running the stack
+SKIP_BUILD=${SKIP_BUILD:-0}
+
+# command to run after the stack has launched, e.g.
+# in cases where you want to tail some containers after launch
+# (by default, it does nothing)
+POST_LAUNCH_CMD=":"
+# if 1, clears the screen before running the post-launch command
+DO_CLEAR="0"
+# if 1, opens the browser window to the app after launching the stack
+DO_OPEN_BROWSER=${DO_OPEN_BROWSER:-1}
+
+# the URL to open when we invoke the browser
+FRONTEND_URL=${FRONTEND_URL:-"http://localhost:5713"}
+
+# helper function to print a message and exit with a specific code
+# in one command
+function fatal() {
+    echo "${1:-fatal error, aborting}"
+    exit ${2:-1}
+}
+
+# cross-platform helper function to open a browser window
+function open_browser() {
+    if [[ "$OSTYPE" == "linux-gnu"* ]]; then
+        xdg-open "$1"
+    elif [[ "$OSTYPE" == "darwin"* ]]; then
+        open "$1"
+    elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
+        start "$1"
+    else
+        echo "WARNING: Unsupported OS: $OSTYPE, unable to open browser"
+    fi
+}
+
+# ===========================================================================
+# === entrypoint
+# ===========================================================================
+
+# source the .env file and export its contents
+# among other things, we'll use the DEFAULT_ENV var in it to set the target env
+set -a
+source .env
+set +a
+
+# check if the first argument is a valid target env, and if not attempt
+# to infer it from the script's parents' directory name
+case $1 in
+    "prod"|"staging"|"dev"|"app")
+        TARGET_ENV=$1
+        shift
+        echo "* Selected target environment: ${TARGET_ENV}"
+        ;;
+    *)
+        # attempt to resolve the target env from the host environment
+        # (e.g., the hostname, possibly the repo root directory name, etc.)
+
+        # get the name of the script's parent directory
+        PARENT_DIR=$(basename $(dirname $(realpath $0)))
+        HOSTNAME=$(hostname)
+
+        # check if the parent directory name contains a valid target env
+        if [[ "${HOSTNAME}" = "jravilab" ]]; then
+            TARGET_ENV="prod"
+            STRATEGY="via hostname ${HOSTNAME}"
+        elif [[ ! -z "${DEFAULT_ENV}" ]]; then
+            TARGET_ENV="${DEFAULT_ENV}"
+            STRATEGY="via DEFAULT_ENV"
+        else
+            echo -e \
+                "ERROR: No valid target env specified, and could not infer" \
+                "target environment from parent directory name:\n${PARENT_DIR}"
+            exit 1
+        fi
+
+        echo "* Inferred target environment: ${TARGET_ENV} (${STRATEGY:-n/a})"
+esac
+
+case ${TARGET_ENV} in
+    "prod")
+        DEFAULT_ARGS="up -d"
+        COMPOSE_CMD="docker compose -f docker-compose.yml -f docker-compose.prod.yml"
+        DO_CLEAR="0"
+        # never launch the browser in production
+        DO_OPEN_BROWSER=0
+        # watch the logs after, since we detached after bringing up the stack
+        POST_LAUNCH_CMD="${COMPOSE_CMD} logs -f"
+        ;;
+    "dev")
+        DEFAULT_ARGS="up -d"
+        COMPOSE_CMD="docker compose -f docker-compose.yml -f docker-compose.override.yml"
+        DO_CLEAR="1"
+        # watch the logs after, since we detached after bringing up the stack
+        POST_LAUNCH_CMD="${COMPOSE_CMD} logs -f"
+        ;;
+    "app")
+        # launches just the services necessary to run the shiny app, for frontend development.
+        # note that you won't be able to submit jobs or query the accounting database.
+        DEFAULT_ARGS="up"
+        COMPOSE_CMD="docker compose -f docker-compose.apponly.yml"
+        DO_CLEAR="1"
+        SKIP_BUILD="1" # don't build images for the app environment, since it uses so few of them
+        # watch the logs after, since we detached after bringing up the stack
+        # POST_LAUNCH_CMD="${COMPOSE_CMD} logs -f app"
+        ;;
+    *)
+        echo "ERROR: Unknown target environment: ${TARGET_ENV}"
+        exit 1
+esac
+
+# ensure that docker compose can see the target env, so it can, e.g., namespace hosts to their environment
+export TARGET_ENV=${TARGET_ENV}
+
+# if any arguments were specified after the target env, use those instead of the default
+if [ $# -gt 0 ]; then
+  DEFAULT_ARGS="$@"
+  DO_CLEAR="0" # don't clear so we can see the output
+fi
+
+# check if a "control" command is the current first argument; if so, skip the build
+if [[ "$1" =~ ^(down|restart|logs)$ ]]; then
+    echo "* Skipping build, since we're running a control command: $1"
+    SKIP_BUILD=1
+    # also skip the post-launch command so we don't get stuck, e.g., tailing
+    POST_LAUNCH_CMD=""
+    # also skip opening a browser window
+    DO_OPEN_BROWSER=0
+fi
+
+# if SKIP_BUILD is 0 and 'down' isn't the docker compose command, build images
+# for the target env.
+# each built image is tagged with its target env, so they don't collide with
+# each other; in the case of prod, the tag is "latest".
+if [ "${SKIP_BUILD}" -eq 0 ]; then
+    if [ "${TARGET_ENV}" == "prod" ] || [ "${TARGET_ENV}" == "app" ]; then
+        IMAGE_TAG="latest"
+    else
+        IMAGE_TAG="${TARGET_ENV}"
+    fi
+
+    echo "* Building images for ${TARGET_ENV} (tag: ${IMAGE_TAG})"
+    # ./build_images.sh ${IMAGE_TAG} || fatal "Failed to build images for ${TARGET_ENV}"
+    ${COMPOSE_CMD} build || fatal "Failed to build images for ${TARGET_ENV}"
+fi
+
+echo "Running: ${COMPOSE_CMD} ${DEFAULT_ARGS}"
+${COMPOSE_CMD} ${DEFAULT_ARGS} && \
+( [[ ${DO_CLEAR} = "1" ]] && clear || exit 0 ) && \
+(
+    [[ ${DO_OPEN_BROWSER} = "1" ]] \
+        && open_browser "${FRONTEND_URL}" \
+        || exit 0
+) &&
+${POST_LAUNCH_CMD}
diff --git a/services/caddy/Caddyfile b/services/caddy/Caddyfile
@@ -0,0 +1,5 @@
+# serve /srv
+:80 {
+	root * /srv
+	file_server
+}
diff --git a/services/postgres/README.md b/services/postgres/README.md
@@ -0,0 +1,9 @@
+# PostgreSQL Configuration
+
+This folder will eventually contain configuration for the PostgreSQL instance
+that runs within the MolEvolvR stack.
+
+The instance is responsible for:
+- keeping records of analysis submissions
+- tracking job status between the backend and SLURM controller
+- recording any artifacts that aren't better stored on the filesystem.