Skip to content

Commit

Permalink
feat(workflow): summarizer filters for text only input WIP (#134)
Browse files Browse the repository at this point in the history
* feat(workflow): summarizer filters for text only input WIP

* feat(workflow): summarizer filter fix

* feat(workflow): summarizer filter fix

* feat(workflow): summarizer filter fix

* fix(workflow): does not error out if grep does not validate

* fix(workflow): fix test

* fix(workflow): install recent version of file command

* fix(workflow): fix test

* fix(config): updated summarizer image version

* fix(workflow): cleanup
  • Loading branch information
enricorotundo authored May 4, 2023
1 parent 3c1e0a8 commit e267362
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 30 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,10 @@ bin/
.swagger-codegen-ignore
.openapi-generator/
.openapi-generator-ignore

.terraform/*
*.tfstate
*.tfstate.backup
.terraform.lock.hcl
production.tfvars
ops/terraform/*.out
2 changes: 1 addition & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
memory: 8Gi
- id: summarization-job
type: bacalhau
image: ghcr.io/bacalhau-project/amplify/summarization:0.0.2
image: ghcr.io/bacalhau-project/amplify/summarization:0.0.3
entrypoint:
- /usr/local/bin/run
timeout: 10m
Expand Down
11 changes: 11 additions & 0 deletions containers/summarization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ FROM docker.io/huggingface/transformers-cpu:4.18.0
RUN mkdir -p /models
RUN python3 -c 'from transformers import pipeline; pipeline("summarization", model="facebook/bart-large-cnn").save_pretrained("/models/bart-large-cnn")'

RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
&& rm -rf /var/lib/apt/lists/*

RUN wget https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/file/1:5.41-3/file_5.41.orig.tar.gz && tar -xf file_5.41.orig.tar.gz
RUN cd file-5.41 && \
FORCE_UNSAFE_CONFIGURE=1 ./configure --prefix=/root/filecmd/ && \
make && make install && \
cp /root/filecmd/bin/file /usr/local/bin/file5.41 && \
chmod +x /usr/local/bin/file5.41

RUN mkdir -p /usr/local/bin
ENV PATH="/usr/local/bin:${PATH}"
COPY containers/scripts/run_program.sh /usr/local/bin/run_program.sh
Expand Down
2 changes: 1 addition & 1 deletion containers/summarization/run
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

run_program.sh 'python3 /usr/local/bin/bart-summarize.py ${input_file} ${output_dir}' /inputs /outputs ;
run_program.sh 'if [ \$\(/usr/local/bin/file5.41 --mime --brief ${input_file} \| grep -c text\/plain \) -eq 1 ] \; then python3 /usr/local/bin/bart-summarize.py ${input_file} ${output_dir} \; fi' /inputs /outputs ;
6 changes: 6 additions & 0 deletions containers/summarization/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ main() {
rm -rf $SCRIPT_DIR/outputs
docker run -it --rm -v $SCRIPT_DIR/../test/testdata/images:/inputs -v $SCRIPT_DIR/outputs:/outputs --entrypoint "" $IMAGE run
checkError

# Json file
rm -rf $SCRIPT_DIR/outputs
docker run -it --rm -v $SCRIPT_DIR/../test/testdata/json_blob:/inputs -v $SCRIPT_DIR/outputs:/outputs --entrypoint "" $IMAGE run
checkError
checkFileDoesNotExists "$SCRIPT_DIR/outputs/bafkreibd4mqgydtbi5vuygtti2eiugyxiqjzwsaexvs7ofmqyrsmsvmosi.plain.json"
}

main
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"contract": "0xbb2b8038a1640196fbe3e38816f3e67cba72d940", "token0Reserves": {"block17179339": 171.43612254, "block17179340": 171.43612254, "block17179341": 171.43612254, "block17179342": 171.43612254, "block17179343": 171.43612254, "block17179344": 171.43612254, "block17179345": 171.43612254, "block17179346": 171.43612254, "block17179347": 171.43612254, "block17179348": 171.43612254}, "token1Reserves": {"block17179339": 2627.167074480664, "block17179340": 2627.167074480664, "block17179341": 2627.167074480664, "block17179342": 2627.167074480664, "block17179343": 2627.167074480664, "block17179344": 2627.167074480664, "block17179345": 2627.167074480664, "block17179346": 2627.167074480664, "block17179347": 2627.167074480664, "block17179348": 2627.167074480664}, "token0ReservesUSD": {"block17179339": 4909105.078209809, "block17179340": 4909965.461030417, "block17179341": 4909212.575235665, "block17179342": 4909183.705340615, "block17179343": 4909295.434128495, "block17179344": 4909317.19916909, "block17179345": 4909515.75660146, "block17179346": 4909478.698571932, "block17179347": 4909478.698571932, "block17179348": 4909388.311871055}, "token1ReservesUSD": {"block17179339": 4909105.07820981, "block17179340": 4909965.461030417, "block17179341": 4909212.575235665, "block17179342": 4909183.705340615, "block17179343": 4909295.434128495, "block17179344": 4909317.19916909, "block17179345": 4909515.756601461, "block17179346": 4909478.6985719325, "block17179347": 4909478.6985719325, "block17179348": 4909388.3118710555}, "chainHeightRange": {"begin": 17179339, "end": 17179348}, "timestamp": 1683106583.0}
28 changes: 0 additions & 28 deletions ops/terraform/.terraform/terraform.tfstate

This file was deleted.

0 comments on commit e267362

Please sign in to comment.