Skip to content

Commit

Permalink
Merge branch 'konturio-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
dakotabenjamin committed Jan 21, 2025
2 parents 0e31879 + 8038b1c commit c8c29a3
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 64 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/img_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
uses: hotosm/gh-workflows/.github/workflows/[email protected]
with:
image_tags: |
"ghcr.io/${{ github.repository }}:ci-${{ github.ref_name }}"
"ghcr.io/${{ github.repository }}:${{ github.ref_name }}"
invalidate-cache:
runs-on: ubuntu-latest
Expand All @@ -31,4 +31,4 @@ jobs:
gh actions-cache delete image-cache-${{ runner.os }} \
-R ${{ github.repository }} \
-B ${{ github.ref_name }} \
--confirm || true
--confirm || true
24 changes: 10 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
FROM quay.io/mojodna/gdal:v2.3.x
LABEL maintainer="[email protected]"
FROM ghcr.io/osgeo/gdal:ubuntu-full-3.6.0

ARG http_proxy

Expand All @@ -14,10 +13,12 @@ RUN apt-get update \
git \
jq \
nfs-common \
build-essential \
parallel \
python-pip \
python-wheel \
python-setuptools \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
unzip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
Expand All @@ -34,17 +35,12 @@ ENV VSI_CACHE_SIZE 536870912
ENV LC_ALL C.UTF-8
ENV LANG C.UTF-8

RUN apt-get update
RUN apt-get install -y build-essential checkinstall
RUN apt-get install -y libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev
RUN apt-get install -y wget
RUN wget https://www.python.org/ftp/python/3.6.3/Python-3.6.3.tgz
RUN tar -xvf Python-3.6.3.tgz
RUN cd Python-3.6.3 && ./configure && make && make install

RUN pip3 install --upgrade setuptools
RUN pip3 install rasterio haversine cython awscli
RUN pip3 install rasterio haversine cython awscli requests

COPY bin/* /opt/marblecutter-tools/bin/

RUN chmod +x /opt/marblecutter-tools/bin/process.sh
RUN chmod +x /opt/marblecutter-tools/bin/process.py

RUN ln -s /opt/marblecutter-tools/bin/* /usr/local/bin/ && mkdir -p /efs
2 changes: 1 addition & 1 deletion bin/build_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main():
for a in args.meta:
k, v = a.split('=', 1)
if v:
geojson['properties'][k] = json.loads(v)
geojson['properties'][k] = json.loads(v, strict=False)

assert incoming['type'] == 'FeatureCollection', \
"Expecting a FeatureCollection GeoJSON object"
Expand Down
2 changes: 1 addition & 1 deletion bin/ingest_single_footprint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ fi

set -e

>&2 echo "Ingesting footprint for ${footprint_uri}"
echo "Ingesting footprint for ${footprint_uri}"

# We're going to say the source is the root of the S3 key after the bucket name
source=$(awk -F '/' '{print $4}' <<< $footprint_uri)
Expand Down
95 changes: 95 additions & 0 deletions bin/process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python3
# coding=utf-8

import subprocess
import sys
import requests
import json
import threading

# Get command line arguments
command_line_arguments = sys.argv[1:]

# Construct the process command to execute
process_command = ['bash', 'process.sh'] + command_line_arguments

# Get the callback URL from the command line arguments
callback_url = sys.argv[3]

# Function to send a callback request


def send_callback_request(body):
response = requests.post(callback_url, json=body)

# Start the subprocess and capture its standard error output


def read_stream(stream, callback):
while True:
line = stream.readline()
if line:
callback(line)
else:
break


# Initialize variables for parsing the subprocess's standard error output

def stdout_callback(line):
print(line)

# Parse the standard error output line by line


def stderr_callback(stderr_chunk):
status_message_buffer = ''
inside_status_tag = False
error_message = ''

i = 0
while i < len(stderr_chunk):
# Check if the current character marks the start of a status message tag
if stderr_chunk[i:i+2] == '#*':
inside_status_tag = True
i += 2
# Check if the current character marks the end of a status message tag
elif stderr_chunk[i:i+2] == '*#':
inside_status_tag = False
# Extract the status message from the buffer and send a callback request if necessary
text = status_message_buffer.strip()
if text:
print('Found status message:', text, flush=True)
status_update_body = json.loads(text)
if status_update_body["status"] == "failed":
status_update_body["message"] = error_message
send_callback_request(status_update_body)
status_message_buffer = ''
i += 2
# If inside a status message tag, append the current character to the buffer
elif inside_status_tag:
status_message_buffer += stderr_chunk[i]
i += 1
# If not inside a status message tag, append the current character to the error message buffer
else:
error_message += stderr_chunk[i]
i += 1


# Start the subprocess and capture its standard error output
process_instance = subprocess.Popen(
process_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)

# Create separate threads for reading stdout and stderr
stdout_thread = threading.Thread(target=read_stream, args=(
process_instance.stdout, stdout_callback))
stderr_thread = threading.Thread(target=read_stream, args=(
process_instance.stderr, stderr_callback))

# Start the threads
stdout_thread.start()
stderr_thread.start()

# Wait for both threads to finish
stdout_thread.join()
stderr_thread.join()
53 changes: 28 additions & 25 deletions bin/process.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ THUMBNAIL_SIZE=${THUMBNAIL_SIZE:-300}
# support for S3-compatible services (for GDAL + transcode.sh)
# TODO support AWS_HTTPS to match GDAL
export AWS_S3_ENDPOINT_SCHEME=${AWS_S3_ENDPOINT_SCHEME:-https://}
export AWS_S3_ENDPOINT=${AWS_S3_ENDPOINT:-s3.amazonaws.com}
export AWS_S3_ENDPOINT=$AWS_REGION".s3.amazonaws.com"

if [[ ! -z "$DEBUG" ]]; then
set -x
Expand Down Expand Up @@ -74,26 +74,17 @@ function cleanup() {
function update_status() {
set +u

if [[ ! -z "$callback_url" ]]; then
local status=$1
local message=$2
local status=$1
local message=$2

set +e
cat <<EOF | curl -s -X POST -d @- -H "Content-Type: application/json" "${callback_url}"
{
"status": "${status}",
"message": "${message}"
}
EOF
set -e
fi
>&2 echo "#*{ \"status\": \"${status}\", \"message\": \"${message}\" }*#"

set -u
}

function mark_failed() {
if [[ ! -z "$callback_url" ]]; then
>&2 echo "Failed. Telling ${callback_url}"
echo "Failed. Telling ${callback_url}"
update_status failed
fi
}
Expand Down Expand Up @@ -176,17 +167,29 @@ function download() {
local source=$2

if [[ "$input" =~ ^s3:// ]]; then
>&2 echo "Downloading $input from S3..."
echo "Downloading $input from S3..."
update_status status "Downloading $input from S3..."
aws s3 cp --endpoint-url ${AWS_S3_ENDPOINT_SCHEME}${AWS_S3_ENDPOINT} "$input" "$source"
elif [[ "$input" =~ s3\.amazonaws\.com ]]; then
>&2 echo "Downloading $input from S3 over HTTP..."
echo "Downloading $input from S3 over HTTP..."
update_status status "Downloading $input from S3 over HTTP..."
curl -sfL "$input" -o "$source"
curl -sfSL "$input" -o "$source" || {
retval=$?
if [ $retval -eq 22 ]; then
cleanup_on_failure $LINENO
exit 1
fi
}
elif [[ "$input" =~ ^https?:// && ! "$input" =~ s3\.amazonaws\.com ]]; then
>&2 echo "Downloading $input..."
echo "Downloading $input..."
update_status status "Downloading $input..."
curl -sfL "$input" -o "$source"
curl -sfSL "$input" -o "$source" || {
retval=$?
if [ $retval -eq 22 ]; then
cleanup_on_failure $LINENO
exit 1
fi
}
else
cp "$input" "$source"
fi
Expand Down Expand Up @@ -215,7 +218,7 @@ intermediate=${base}-intermediate.tif
to_clean+=($intermediate ${source}.aux.xml)
gdal_output=$(sed 's|s3://\([^/]*\)/|/vsis3/\1/|' <<< $output)

>&2 echo "Processing ${input} into ${output}.{json,png,tif}..."
echo "Processing ${input} into ${output}.{json,png,tif}..."
update_status processing

# 0. download source
Expand All @@ -225,7 +228,7 @@ download "$input" "$source"
if [[ "$input" =~ \.img ]]; then
set +e

>&2 echo "Attempting to download .ige companion..."
echo "Attempting to download .ige companion..."
download "${input/%.img/.ige}" "${source/%.img/.ige}"

set -e
Expand All @@ -240,7 +243,7 @@ if [[ "$input" =~ ^(s3|https?):// ]]; then
fi

# 6. create thumbnail
>&2 echo "Generating thumbnail..."
echo "Generating thumbnail..."
update_status status "Generating thumbnail..."
thumb=${base}.png
to_clean+=($thumb ${thumb}.aux.xml ${thumb}.msk)
Expand Down Expand Up @@ -279,7 +282,7 @@ gdal_translate \
-outsize $target_width $target_height

# 5. create footprint
>&2 echo "Generating footprint..."
echo "Generating footprint..."
update_status status "Generating footprint..."
info=$(rio info $intermediate)
nodata=$(jq -r .nodata <<< $info)
Expand Down Expand Up @@ -330,7 +333,7 @@ meta=$(< $footprint)
if [[ "$output" =~ ^s3:// ]]; then
update_aws_credentials

>&2 echo "Uploading..."
echo "Uploading..."
update_status status "Uploading..."
aws s3 cp --acl public-read $intermediate "${output}.tif"
aws s3 cp --acl public-read $footprint "${output}.json"
Expand All @@ -349,4 +352,4 @@ fi

rm -f ${intermediate}*

>&2 echo "Done."
echo "Done."
Loading

0 comments on commit c8c29a3

Please sign in to comment.