-
Install dependency
pip3 install -r requirement
-
Run cluster
docker-compose up -d
-
Install airflow
export AIRFLOW_HOME=/working/dir/airflow/dags ./install-airflow.sh
-
Run pipeline
airflow standalone
-
change
NAME_NODE_ID
in/crawler/constants.py
by name node container id. -
Run bash
cd crawler python3 crawler.py
-
change
containerId
in/client/run.sh
by spark container id. -
change
fileName
in/client/run.sh
by application want run. -
Run bash
/client/run.sh