Skip to content

Commit

Permalink
Release 0.1.6 (#9)
Browse files Browse the repository at this point in the history
* Generate codebase from spec 0.1.6

* Complete docker-compose requirements to deploy the stack

* The server can now deidentify clinical notes!!

* Update CI/CD
  • Loading branch information
tschaffter authored Oct 19, 2020
1 parent 1fffffe commit 4c3faed
Show file tree
Hide file tree
Showing 18 changed files with 395 additions and 234 deletions.
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ updates:
- "tschaffter"

- package-ecosystem: "docker"
directory: "/server"
schedule:
interval: "weekly"
target-branch: "develop"
reviewers:
- "tschaffter"

- package-ecosystem: "pip"
directory: "/server"
schedule:
interval: "weekly"
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
uses: docker://hadolint/hadolint:latest
with:
entrypoint: hadolint
args: client/Dockerfile
args: server/Dockerfile client/Dockerfile

test:
needs: [lint]
Expand Down Expand Up @@ -102,7 +102,7 @@ jobs:
- name: Prepare
id: prep
run: |
DOCKER_IMAGE=nlpsandbox/deidentifier-service
DOCKER_IMAGE=nlpsandbox/deidentifier
VERSION=noop
PUSH=false
if [ "${{ github.event_name }}" = "schedule" ]; then
Expand Down Expand Up @@ -146,8 +146,8 @@ jobs:
id: docker_build
uses: docker/build-push-action@v2
with:
context: client
file: client/Dockerfile
context: server
file: server/Dockerfile
platforms: linux/amd64
push: ${{ steps.prep.outputs.push }}
tags: ${{ steps.prep.outputs.tags }}
Expand Down
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
# NLP Sandbox Deidentifier

<!-- [![GitHub Stars](https://img.shields.io/github/stars/Sage-Bionetworks/nlp-sandbox-deidentifier.svg?color=94398d&labelColor=555555&logoColor=ffffff&style=for-the-badge&logo=github)](https://github.com/Sage-Bionetworks/nlp-sandbox-deidentifier) -->
[![Docker Pulls](https://img.shields.io/docker/pulls/nlpsandbox/deidentifier-shiny-app.svg?color=94398d&labelColor=555555&logoColor=ffffff&style=for-the-badge&label=pulls&logo=docker)](https://hub.docker.com/r/Sage-Bionetworks/deidentifier-shiny-app)
[![GitHub CI](https://img.shields.io/github/workflow/status/Sage-Bionetworks/nlp-sandbox-deidentifier/ci.svg?color=94398d&labelColor=555555&logoColor=ffffff&style=for-the-badge&logo=github)](https://github.com/Sage-Bionetworks/nlp-sandbox-deidentifier)
[![GitHub Release](https://img.shields.io/github/release/Sage-Bionetworks/nlp-sandbox-deidentifier.svg?include_prereleases&color=94398d&labelColor=555555&logoColor=ffffff&style=for-the-badge&logo=github)](https://github.com/Sage-Bionetworks/nlp-sandbox-deidentifier/releases)
[![Docker Pulls](https://img.shields.io/docker/pulls/nlpsandbox/date-annotator-example.svg?color=94398d&labelColor=555555&logoColor=ffffff&style=for-the-badge&label=pulls&logo=docker)](https://hub.docker.com/r/nlpsandbox/date-annotator-example)
[![GitHub License](https://img.shields.io/github/license/Sage-Bionetworks/nlp-sandbox-deidentifier.svg?color=94398d&labelColor=555555&logoColor=ffffff&style=for-the-badge&logo=github)](https://github.com/Sage-Bionetworks/nlp-sandbox-deidentifier)

NLP Sandbox de-identification client and server

## Specification
## Specification

TBA

## Usage

The command below starts the Deidentifier stack locally.

docker-compose up

When running, the Deidentifier stacks provides a web interface (http://localhost:3838)
that you can use to deidentify single or multiple clinical notes.
36 changes: 17 additions & 19 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,31 @@ version: "3.8"

services:
date-annotator:
image: nlpsandbox/date-annotator-example
image: nlpsandbox/date-annotator-example:0.1.6
container_name: date-annotator
restart: always
ports:
- 9090:8080
- 9000:8080

# person-name-annotator:
# image: nlpsandbox/person-name-annotator-example
# container_name: person-name-annotator
# restart: always
# ports:
# - 9091:8080

# physical-address-annotator:
# image: nlpsandbox/person-name-annotator-example
# container_name: physical-address-annotator
# restart: always
# ports:
# - 9092:8080
person-name-annotator:
image: nlpsandbox/person-name-annotator-example:0.1.6
container_name: person-name-annotator
restart: always
ports:
- 9001:8080

deidentifier-server:
# image: nlpsandbox/deidentifier-shiny-app
# image: nlpsandbox/deidentifier
build:
context: server
dockerfile: Dockerfile
container_name: deidentifier-server
container_name: deidentifier
restart: always
ports:
- 8081:8080
- 9002:8080
depends_on:
- date-annotator
- person-name-annotator

deidentifier-client:
# image: nlpsandbox/deidentifier-shiny-app
Expand All @@ -40,4 +36,6 @@ services:
container_name: deidentifier-client
restart: always
ports:
- 8080:3838
- 8080:3838
depends_on:
- deidentifier-server
47 changes: 38 additions & 9 deletions server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,45 @@
FROM python:3-alpine
FROM python:3.8.5-slim-buster

RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
ARG S6_VERSION
ENV S6_VERSION=${S6_VERSION:-v2.1.0.0}
ENV S6_BEHAVIOUR_IF_STAGE2_FAILS=2
ENV PIP_NO_CACHE_DIR=off
ENV APP_USER=app
ENV APP_DIR=/opt/app

COPY requirements.txt /usr/src/app/
# Safer bash scripts with 'set -euxo pipefail'
SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]

RUN pip3 install --no-cache-dir -r requirements.txt
# Install dependencies
# hadolint ignore=DL3008
RUN apt-get update -qq -y \
&& apt-get install --no-install-recommends -qq -y \
curl \
unzip \
&& apt-get -y autoclean \
&& apt-get -y autoremove \
&& rm -rf /var/lib/apt/lists/*

COPY . /usr/src/app
# Set up S6 init system
RUN curl -fsSL https://github.com/just-containers/s6-overlay/releases/download/${S6_VERSION}/s6-overlay-amd64.tar.gz \
-o /tmp/s6-overlay.tar.gz \
&& tar xzf /tmp/s6-overlay.tar.gz --directory / \
&& rm -fr /tmp/s6-overlay.tar.gz

EXPOSE 8080
# Add app user
RUN useradd -m -s /bin/bash ${APP_USER} \
&& echo "${APP_USER}:${APP_USER}" | chpasswd

# Copy server files
COPY . ${APP_DIR}
RUN chown -R ${APP_USER}:${APP_USER} ${APP_DIR}

# Install dependencies
RUN pip install -r ${APP_DIR}/requirements.txt

ENTRYPOINT ["python3"]
# Add s6 scripts
COPY root /

EXPOSE 8080

CMD ["-m", "openapi_server"]
ENTRYPOINT ["/init"]
57 changes: 54 additions & 3 deletions server/openapi_server/controllers/deidentified_notes_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import connexion
import six
from flask import jsonify
import requests

from openapi_server.models.error import Error # noqa: E501
from openapi_server.models.note import Note # noqa: E501
Expand All @@ -11,11 +13,60 @@ def deidentified_notes_read_all(note=None): # noqa: E501
Returns the deidentified notes # noqa: E501
:param note:
:param note:
:type note: list | bytes
:rtype: List[Note]
"""
res = []

# for testing
dates_url = "http://localhost:9000/api/v1/dates"
person_names_url = "http://localhost:9001/api/v1/person-names"

requests_session = requests.session()
requests_session.headers.update({'Content-Type': 'application/json'})
requests_session.headers.update({'charset':'utf-8'})

if connexion.request.is_json:
note = [Note.from_dict(d) for d in connexion.request.get_json()] # noqa: E501
return 'do some magic!'
dates = []
person_names = []
notes = connexion.request.get_json()

# Get date annotations
response = requests_session.post(url=dates_url, json=notes)
if response.status_code == 200:
dates = response.json()

# Get person name annotations
response = requests_session.post(url=person_names_url, json=notes)
if response.status_code == 200:
person_names = response.json()

# Create deidentified notes
for note in notes:
note_id = note['id']
res.append(deidentify_note(note,
[d for d in dates if d['noteId'] == note_id],
[d for d in person_names if d['noteId'] == note_id]))

return jsonify(res)


def deidentify_note(note, dates, person_names):
"""
Returns the deidentified clinical note where annotations are masked with '*'.
"""
mask_character = '*'
text = note['text']

for annotation in dates:
mask = mask_character * annotation['length']
text = text[:annotation['start']] + mask + text[annotation['start'] + annotation['length']:]

for annotation in person_names:
mask = mask_character * annotation['length']
text = text[:annotation['start']] + mask + text[annotation['start'] + annotation['length']:]

note['text'] = text
return note
11 changes: 3 additions & 8 deletions server/openapi_server/controllers/health_controller.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
import connexion
import six
from flask import jsonify

from openapi_server.models.error import Error # noqa: E501
from openapi_server.models.health import Health # noqa: E501
from openapi_server import util


def health(): # noqa: E501
"""Get Health
Get the health of the API # noqa: E501
:rtype: Health
"""
return 'do some magic!'
# return jsonify(Health("pass"))
return jsonify({'status': 'pass'})
2 changes: 0 additions & 2 deletions server/openapi_server/models/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ def id(self, id):
:param id: The id of this Entity.
:type id: int
"""
if id is None:
raise ValueError("Invalid value for `id`, must not be `None`") # noqa: E501

self._id = id

Expand Down
Loading

0 comments on commit 4c3faed

Please sign in to comment.