Skip to content

Data Fetching

Data Fetching #1309

Workflow file for this run

name: Data Fetching
on:
push:
schedule:
- cron: "5 19 * * *"
workflow_dispatch:
jobs:
Fetch-Route-Data:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Setup Python environment
uses: actions/setup-python@v4
with:
python-version: '3.8'
architecture: 'x64'
cache: 'pip'
cache-dependency-path: crawling/requirements.txt
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r ./crawling/requirements.txt
- name: Crawling resources
env:
REQUEST_LIMIT: "6"
run: |
set -o xtrace
python ./crawling/parseHoliday.py
python ./crawling/ctb.py
python ./crawling/kmb.py
python ./crawling/nlb.py
python ./crawling/lrtfeeder.py
python ./crawling/lightRail.py
python ./crawling/mtr.py
python ./crawling/parseJourneyTime.py
python ./crawling/parseGtfs.py
python ./crawling/parseGtfsEn.py
python ./crawling/gmb.py
python ./crawling/sunferry.py
python ./crawling/fortuneferry.py
python ./crawling/hkkf.py
python ./crawling/matchGtfs.py
python ./crawling/cleansing.py
python ./crawling/mergeRoutes.py
python ./crawling/routeCompare.py
python ./crawling/mtrExits.py
- name: Archive crawling outputs
uses: actions/upload-artifact@v3
if: always()
with:
name: Crawled Files
retention-days: 14
path: |
holiday.json
routeFareList*
routeList*
stopList*
routeTime.json
gtfs
gtfs.json
routeGtfs*
gtfs.zip
ROUTE_BUS.xml
route-ts/
exits.mtr.json
- name: Update MD5
run: md5sum routeFareList.min.json | cut -f1 -d ' ' | tr -d $'\n' > routeFareList.md5
- name: create deployment folder
run: mkdir -p build
- name: cp files into deployment folder
run: cp -r routeFareList.json routeFareList.min.json routeFareList.md5 CNAME exits.mtr.json route-ts build/
- name: cp route-ts into deployment folder
run: cp -r route-ts build
- name: Update resources
uses: JamesIves/github-pages-deploy-action@v4
with:
folder: build
commit-message: Update resources
branch: gh-pages
clean: false