From a3e34bbbe6e5f4828ec63e3f764c60ce40ad3269 Mon Sep 17 00:00:00 2001 From: maxgrossman Date: Fri, 15 Sep 2017 17:09:53 -0400 Subject: [PATCH] add qgis geometry cleaning modules the 'delete holes' module has been added to the pipeline. next to be added is the initial cleaning module --- {processing/db => db}/connection.js | 4 +- db/local/index.js | 7 ++ docker/clean-geometries/Dockerfile | 17 ++++ docker/clean-geometries/clean-geom.py | 23 +++++ docker/clean-geometries/run.sh | 6 ++ docker/delete-holes/Dockerfile | 8 ++ docker/delete-holes/delete-holes.py | 23 +++++ docker/delete-holes/run.sh | 7 ++ package.json | 2 +- processing/{a-dissolve.sh => b-dissolve.sh} | 10 ++- processing/b-reproject.sh | 13 --- processing/c-delete-holes.sh | 16 ++++ ...ojson-spec.sh => d-update-geojson-spec.sh} | 0 processing/e-insert-tables.js | 85 ------------------- ...-simplify-props.js => e-simplify-props.js} | 0 processing/f-insert-tables.js | 85 +++++++++++++++++++ 16 files changed, 202 insertions(+), 104 deletions(-) rename {processing/db => db}/connection.js (80%) create mode 100644 db/local/index.js create mode 100644 docker/clean-geometries/Dockerfile create mode 100644 docker/clean-geometries/clean-geom.py create mode 100644 docker/clean-geometries/run.sh create mode 100644 docker/delete-holes/Dockerfile create mode 100644 docker/delete-holes/delete-holes.py create mode 100644 docker/delete-holes/run.sh rename processing/{a-dissolve.sh => b-dissolve.sh} (76%) delete mode 100755 processing/b-reproject.sh create mode 100755 processing/c-delete-holes.sh rename processing/{c-update-geojson-spec.sh => d-update-geojson-spec.sh} (100%) delete mode 100644 processing/e-insert-tables.js rename processing/{d-simplify-props.js => e-simplify-props.js} (100%) create mode 100755 processing/f-insert-tables.js diff --git a/processing/db/connection.js b/db/connection.js similarity index 80% rename from processing/db/connection.js rename to db/connection.js index 6d9163d..cbd0524 100644 --- a/processing/db/connection.js +++ b/db/connection.js @@ -1,9 +1,9 @@ 'use strict'; var assert = require('assert'); -// set the db urls base on environment +// set the db urls based on environment var DEFAULT_ENVIRONMENT = 'development'; -var environment = process.env.MACROCOSM_ENV || DEFAULT_ENVIRONMENT; +var environment = process.env.ORMA_ENV || DEFAULT_ENVIRONMENT; var connection = process.env.DATABASE_URL || require('./local').connection[environment]; assert.ok(connection, 'Connection is undefined; check DATABASE_URL or local.js'); diff --git a/db/local/index.js b/db/local/index.js new file mode 100644 index 0000000..48d6b24 --- /dev/null +++ b/db/local/index.js @@ -0,0 +1,7 @@ +module.exports = { + connection: { + 'development': '', + 'staging': '', + 'production': '' + } +} diff --git a/docker/clean-geometries/Dockerfile b/docker/clean-geometries/Dockerfile new file mode 100644 index 0000000..383f8a5 --- /dev/null +++ b/docker/clean-geometries/Dockerfile @@ -0,0 +1,17 @@ +# Synopsys: Dockerfile that builds an image that allows headless use of qgis processing module +# uses https://hub.docker.com/r/nuest/qgis-model/~/dockerfile/ +# note, the entrypoint of the image is a custom script, hence the forced use of /bin/bash +# entrypoint in docker run cmd + +FROM nuest/qgis-model:xenial-multimodel +# copy over python scripts and initial data. +COPY ./clean-geom.py /workspace/clean-geom.py +COPY ./vietnam-communes.cpg /workspace/vietnam-communes.cpg +COPY ./vietnam-communes.dbf /workspace/vietnam-communes.dbf +COPY ./vietnam-communes.prj /workspace/vietnam-communes.prj +COPY ./vietnam-communes.qpj /workspace/vietnam-communes.qpj +COPY ./vietnam-communes.shp /workspace/vietnam-communes.shp +COPY ./vietnam-communes.shx /workspace/vietnam-communes.shx +COPY ./run.sh /workspace/run.sh +# on entry into container, run the run.sh script +ENTRYPOINT ["/bin/bash", "/workspace/run.sh"] diff --git a/docker/clean-geometries/clean-geom.py b/docker/clean-geometries/clean-geom.py new file mode 100644 index 0000000..24ba992 --- /dev/null +++ b/docker/clean-geometries/clean-geom.py @@ -0,0 +1,23 @@ +# Synopysis: cleans admin geometries using the grass gis v.clean alg available with qgis install +# sys is used primarily for adding qgis utils to path +import sys +import os +# the following qgis modules import and order reference the following +# https://github.com/nuest/docker-qgis-model/blob/master/workspace/example/model.py#L20 +from qgis.core import * +import qgis.utilsinput_communes, +# to use processing script a qgis app needs to be initialized +app = QgsApplication([], True) +QgsApplication.setPrefixPath('/usr', True) +QgsApplication.initQgis() +# append processing plugin to system path +sys.path.append('/usr/share/qgis/python/plugins') +# import, then initalize the processing pobject +from processing.core.Processing import Processing +Processing.initialize() +import processing +# set path to inputs and outputs +input_communes = os.path.join(os.getcwd(), sys.argv[1]) +output_communes = os.path.join(os.getcwd(), sys.argv[2] + '.shp') +# clean the geometries +processing.runalg('grass:v.clean',input_communes,0, 0.1,'205952.54923,985984.624375,929508.401261,2586975.43865',-1, 0.0001,output_communes) diff --git a/docker/clean-geometries/run.sh b/docker/clean-geometries/run.sh new file mode 100644 index 0000000..bcacaf5 --- /dev/null +++ b/docker/clean-geometries/run.sh @@ -0,0 +1,6 @@ +# cd to workspace dir +cd /workspace +# run qgis process in python while keeping qgis 'headless', or put otherwise access and use qgis processing modules +# without running the qgis gui +# clean the geometry for the initial communes file +xvfb-run -e ${XVFB_LOGFILE} python clean-geom.py vietnam-communes.shp vietnam-communes-clean-geom diff --git a/docker/delete-holes/Dockerfile b/docker/delete-holes/Dockerfile new file mode 100644 index 0000000..4e636ab --- /dev/null +++ b/docker/delete-holes/Dockerfile @@ -0,0 +1,8 @@ +FROM nuest/qgis-model:xenial-multimodel +COPY ./delete-holes.py /workspace/delete-holes.py +COPY ./run.sh /workspace/run.sh +COPY ./vietnam-communes.geojson /workspace/vietnam-communes.geojson +COPY ./vietnam-district.geojson /workspace/vietnam-district.geojson +COPY ./vietnam-province.geojson /workspace/vietnam-province.geojson +# on entry into container, run the run.sh script +ENTRYPOINT ["/bin/bash", "/workspace/run.sh"] diff --git a/docker/delete-holes/delete-holes.py b/docker/delete-holes/delete-holes.py new file mode 100644 index 0000000..c78c0f5 --- /dev/null +++ b/docker/delete-holes/delete-holes.py @@ -0,0 +1,23 @@ +# Synopysis: cleans admin geometries using the grass gis v.clean alg available with qgis install +# sys is used primarily for adding qgis utils to path +import sys +import os +# the following qgis modules import and order reference the following +# https://github.com/nuest/docker-qgis-model/blob/master/workspace/example/model.py#L20 +from qgis.core import * +import qgis.utils +# to use processing script a qgis app needs to be initialized +app = QgsApplication([], True) +QgsApplication.setPrefixPath('/usr', True) +QgsApplication.initQgis() +# append processing plugin to system path +sys.path.append('/usr/share/qgis/python/plugins') +# import, then initalize the processing pobject +from processing.core.Processing import Processing +Processing.initialize() +import processing +# set path to inputs and outputs +input_communes = os.path.join(os.getcwd(), sys.argv[1]) +output_communes = os.path.join(os.getcwd(), sys.argv[2] + '.geojson') +# clean the geometries +processing.runalg('qgis:fillholes',input_communes, 100000, output_communes) diff --git a/docker/delete-holes/run.sh b/docker/delete-holes/run.sh new file mode 100644 index 0000000..24f1584 --- /dev/null +++ b/docker/delete-holes/run.sh @@ -0,0 +1,7 @@ +# run qgis process in python while keeping qgis 'headless', or put otherwise access and use qgis processing modules +# without running the qgis gui +cd /workspace +# remove holes for all three admin files +xvfb-run -e ${XVFB_LOGFILE} python delete-holes.py vietnam-province.geojson vietnam-province-filled-holes +xvfb-run -e ${XVFB_LOGFILE} python delete-holes.py vietnam-communes.geojson vietnam-communes-filled-holes +xvfb-run -e ${XVFB_LOGFILE} python delete-holes.py vietnam-district.geojson vietnam-district-filled-holes diff --git a/package.json b/package.json index 1522c2b..68d6e6c 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "author": "maxgrossman ", "license": "MIT", "scripts": { - "start": "chmod +x ./admin-tables-pipe.sh" + "start": "chmod +x ./admin-tables-pipe.sh && ./admin-tables-pipe.sh" }, "dependencies": { "async": "^2.5.0", diff --git a/processing/a-dissolve.sh b/processing/b-dissolve.sh similarity index 76% rename from processing/a-dissolve.sh rename to processing/b-dissolve.sh index d52af9f..8bdfd3f 100755 --- a/processing/a-dissolve.sh +++ b/processing/b-dissolve.sh @@ -15,13 +15,17 @@ do OUTPUT=${1}/output/vietnam-${ADMIN_ARRAY[0]}.geojson # make ${DISSOLVE_FIELD} per the current admin's dissolve field DISSOLVE_FIELD=${ADMIN_ARRAY[1]} - # dissolve on admin field with ogr2ogr and write output as a geojson + # dissolve on admin field with ogr2ogr and write output as a geojson; also reproject from UTM to wgs84 # this comman creates a new geojson where features are geometries that share the same ${DISSOLVE_FIELD} # 'ST_UNION' merges geometries. # 'GROUP BY' tells gdal which gemetries to merge together - ogr2ogr -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD" + # -t_srs is a flag for reprojection + # EPSG:4326 is the WGS84 EPSG code + # http://spatialreference.org/ref/epsg/wgs-84/ + ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD" done # name of geojson output file OUT_GJSN=${1}/output/${INPUT_NAME}.geojson # since communes don't need to be dissolved, do a simple shp->geojson conversion -ogr2ogr -f 'GeoJSON' "${INPUT}" "${IN_SHP}" +# make sure also to reproject +ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUT_GJSN}" "${INPUT}" diff --git a/processing/b-reproject.sh b/processing/b-reproject.sh deleted file mode 100755 index 1285c07..0000000 --- a/processing/b-reproject.sh +++ /dev/null @@ -1,13 +0,0 @@ -# Synopysis: reproject each admin geojson from UTM to WGS84 -for ADMIN in communes district province -do - # generate the input name for the current ${ADMIN} file - INPUT=${1}/tmp/vietnam-${ADMIN}.geojson - # generate the output name for the current ${ADMIN} file - OUTPUT=${1}/output/vietnam-${ADMIN}-wgs84.geojson - # reproject ${INPUT} to wgs84 with ogr2ogr - # -t_srs is a flag for reprojection - # EPSG:4326 is the WGS84 EPSG code - # http://spatialreference.org/ref/epsg/wgs-84/ - ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -done diff --git a/processing/c-delete-holes.sh b/processing/c-delete-holes.sh new file mode 100755 index 0000000..666ec89 --- /dev/null +++ b/processing/c-delete-holes.sh @@ -0,0 +1,16 @@ +# Synopysis: remoevs holes from dissolved polygons + +# take what is in the delete-holes tmp directory and copy it into the the ./processing/docker/delete-holes folder +cp ${1}/tmp/* ./docker/delete-holes +# build delete holes container +docker build -t 'qgis_headless' ./docker/delete-holes +# run the docker contianer, entering at run.sh +docker run -it qgis_headless +# get admin areas from container add copy them over to the output folder of the process +# using the docker cp command, copying from the most recently built container to the output folder +# `docker ps --latest -q` grabs the most recent container +docker cp `docker ps --latest -q`:workspace/vietnam-communes-filled-holes.geojson ${1}/output/vietnam-communes-filled-holes.geojson +docker cp `docker ps --latest -q`:workspace/vietnam-district-filled-holes.geojson ${1}/output/vietnam-district-filled-holes.geojson +docker cp `docker ps --latest -q`:workspace/vietnam-province-filled-holes.geojson ${1}/output/vietnam-province-filled-holes.geojson +# clean up the docker directory +rm -f ./docker/delete-holes/vietnam* diff --git a/processing/c-update-geojson-spec.sh b/processing/d-update-geojson-spec.sh similarity index 100% rename from processing/c-update-geojson-spec.sh rename to processing/d-update-geojson-spec.sh diff --git a/processing/e-insert-tables.js b/processing/e-insert-tables.js deleted file mode 100644 index c2ee251..0000000 --- a/processing/e-insert-tables.js +++ /dev/null @@ -1,85 +0,0 @@ -/** - * @file reads streaming admin geojson and 'inserts' each feature 'into' matching admin postgis table - */ - -// these modules are needed for streaming geojsons -var createReadStream = require('fs').createReadStream; -var createWriteStream = require('fs').createWriteStream; -var readdirSync = require('fs').readdirSync; -var geojsonStream = require('geojson-stream'); -var parser = geojsonStream.parse(); -var stringifier = geojsonStream.stringify(); -// module to read path -var path = require('path'); -// parallel allows for reading each admin geojson stream asynchronously -var parallel = require('async').parallel; -// knex creates a knex obj that links to the current environmnets database -var knex = require('./db/connection/.js') -// postgis is a knex extension to generate postgis statements -var postgis = require('knex-postgis'); -// helps split single-line json into chunked-by-line geojson as mentinoed in d-simplify-props.js -var split = require('split'); -// directory with geojsons -var adminPath = `data/processing/d-simplify-props/tmp`; -// array including elements with each file in that directory -var admins = readdirSync(adminPath) -// st is short for spatial type. spatial type is the prefix for postgis functions that allow for spatial sql statements -// see https://postgis.net/docs/reference.html -var st = postgis(knex); - -// create list of async functions to pass to parallel -const adminTasks = admins.map((admin) => { - return function(cb) { - // base name mirrors admin name - var basename = admin.split('-')[1] - // here's the path to the current admin file - var adminFile = path.join(adminPath, admin) - // stream of this admin file - var adminFileStream = createReadStream(adminFile) - // pipe split for the lines needed to send along to the geojson parser - .pipe(split()) - // the geojson parser for parsing the feature collection - .pipe(parser) - .on('data', (feature) => { - // for each feature, insert it into the table using the insertIntoTable function - insertIntoTable(feature, basename) - }) - // fire a callback on end event - .on('end', () => { cb(null, null) }) - } -}); - -/** - * transforms feature into postgis table row and inserts it into the proper admin table - * - * @param {object} feature geojson feature - * @param {string} admin admin name - */ -function insertIntoTable (feature, admin) { - // generate properties and geometry objects from feature object - const properties = feature.properties; - const geometry = feature.geometry; - const statement = db.insert({ - // shared identifier for each row in admin table - type: admin, - // numeric id for current admin unit - id: properties.id, - // numeric id for currrent admin unit's parent (for instance a commune's parent district) - // this is helpful for future spatial analysis - parent_id: properties.p_id, - // admin unit geometry - geo: st.geomFromGeoJSON(geometry), - // english name of admin unit - name_en: properties.en_name, - // vietnamese name of admin unit - name_vn: '' - }) - // method that inserts the insert statement into its correct table - .into(`${admin}-table`).toString(); -} - -// run tasks in parallel -parallel(adminTasks, (err, res) => { - // do nothing on result - if (!err) {} -}); diff --git a/processing/d-simplify-props.js b/processing/e-simplify-props.js similarity index 100% rename from processing/d-simplify-props.js rename to processing/e-simplify-props.js diff --git a/processing/f-insert-tables.js b/processing/f-insert-tables.js new file mode 100755 index 0000000..854048f --- /dev/null +++ b/processing/f-insert-tables.js @@ -0,0 +1,85 @@ +/** + * @file reads streaming admin geojson and 'inserts' each feature 'into' matching admin postgis table + */ + +// // these modules are needed for streaming geojsons +// var createReadStream = require('fs').createReadStream; +// var createWriteStream = require('fs').createWriteStream; +// var readdirSync = require('fs').readdirSync; +// var geojsonStream = require('geojson-stream'); +// var parser = geojsonStream.parse(); +// var stringifier = geojsonStream.stringify(); +// // module to read path +// var path = require('path'); +// // parallel allows for reading each admin geojson stream asynchronously +// var parallel = require('async').parallel; +// // knex creates a knex obj that links to the current environmnets database +// var knex = require('./db/connection/.js') +// // postgis is a knex extension to generate postgis statements +// var postgis = require('knex-postgis'); +// // helps split single-line json into chunked-by-line geojson as mentinoed in d-simplify-props.js +// var split = require('split'); +// // directory with geojsons +// var adminPath = `data/processing/d-simplify-props/tmp`; +// // array including elements with each file in that directory +// var admins = readdirSync(adminPath) +// // st is short for spatial type. spatial type is the prefix for postgis functions that allow for spatial sql statements +// // see https://postgis.net/docs/reference.html +// var st = postgis(knex); +// +// // create list of async functions to pass to parallel +// const adminTasks = admins.map((admin) => { +// return function(cb) { +// // base name mirrors admin name +// var basename = admin.split('-')[1] +// // here's the path to the current admin file +// var adminFile = path.join(adminPath, admin) +// // stream of this admin file +// var adminFileStream = createReadStream(adminFile) +// // pipe split for the lines needed to send along to the geojson parser +// .pipe(split()) +// // the geojson parser for parsing the feature collection +// .pipe(parser) +// .on('data', (feature) => { +// // for each feature, insert it into the table using the insertIntoTable function +// insertIntoTable(feature, basename) +// }) +// // fire a callback on end event +// .on('end', () => { cb(null, null) }) +// } +// }); +// +// /** +// * transforms feature into postgis table row and inserts it into the proper admin table +// * +// * @param {object} feature geojson feature +// * @param {string} admin admin name +// */ +// function insertIntoTable (feature, admin) { +// // generate properties and geometry objects from feature object +// const properties = feature.properties; +// const geometry = feature.geometry; +// const statement = db.insert({ +// // shared identifier for each row in admin table +// type: admin, +// // numeric id for current admin unit +// id: properties.id, +// // numeric id for currrent admin unit's parent (for instance a commune's parent district) +// // this is helpful for future spatial analysis +// parent_id: properties.p_id, +// // admin unit geometry +// geo: st.geomFromGeoJSON(geometry), +// // english name of admin unit +// name_en: properties.en_name, +// // vietnamese name of admin unit +// name_vn: '' +// }) +// // method that inserts the insert statement into its correct table +// .into(`${admin}-table`).toString(); +// } +// +// // run tasks in parallel +// parallel(adminTasks, (err, res) => { +// // do nothing on result +// if (!err) {} +// });