From 99f2373429575e8ae319e73f209f885b147054f9 Mon Sep 17 00:00:00 2001 From: maxgrossman Date: Thu, 14 Sep 2017 17:52:43 -0400 Subject: [PATCH] document! --- processing/a-dissolve.sh | 27 ++++++++----- processing/b-reproject.sh | 9 ++++- processing/c-update-geojson-spec.sh | 14 +++++-- processing/d-simplify-props.js | 38 +++++++++++++----- processing/e-insert-tables.js | 60 +++++++++++++++++++++-------- 5 files changed, 108 insertions(+), 40 deletions(-) diff --git a/processing/a-dissolve.sh b/processing/a-dissolve.sh index 1e7193d..d52af9f 100755 --- a/processing/a-dissolve.sh +++ b/processing/a-dissolve.sh @@ -1,20 +1,27 @@ -# input filename + file +# Synopysis: creates district and province level admins by dissolving (merging geometrise) of features with same uniq id. + +# input name is used to build names of inputs and output files of each process INPUT_NAME=vietnam-communes +# input for dissolve to make district and province level boundaries as well as convert the commune shapefile to geojson INPUT=${1}/tmp/${INPUT_NAME}.shp -# copy input shapefile into tmp directory -# for districts and provinces + their uniq field + +# for both the district and province, create a new geojson that dissolves features on the unique field id supplied +# on the right hand side of the semi-colon for ADMIN in 'district;DISTCODE02' 'province;PROCODE02' do - # split ADMIN into array including admin name and its field + # split ${ADMIN} string on the semi-colon to grab the admin name and field id ADMIN_ARRAY=(${ADMIN//;/ }) - # use admin name to generate output file name + # make the unique output file per the current admin name OUTPUT=${1}/output/vietnam-${ADMIN_ARRAY[0]}.geojson - # set DISSOLVE_FIELD to admin field + # make ${DISSOLVE_FIELD} per the current admin's dissolve field DISSOLVE_FIELD=${ADMIN_ARRAY[1]} - # dissolve on admin field and write to file + # dissolve on admin field with ogr2ogr and write output as a geojson + # this comman creates a new geojson where features are geometries that share the same ${DISSOLVE_FIELD} + # 'ST_UNION' merges geometries. + # 'GROUP BY' tells gdal which gemetries to merge together ogr2ogr -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD" done -# also convert communes shp to geojosn -IN_SHP=${1}/tmp/${INPUT_NAME}.shp +# name of geojson output file OUT_GJSN=${1}/output/${INPUT_NAME}.geojson -ogr2ogr -f 'GeoJSON' "${OUT_GJSN}" "${IN_SHP}" +# since communes don't need to be dissolved, do a simple shp->geojson conversion +ogr2ogr -f 'GeoJSON' "${INPUT}" "${IN_SHP}" diff --git a/processing/b-reproject.sh b/processing/b-reproject.sh index 4a0f425..1285c07 100755 --- a/processing/b-reproject.sh +++ b/processing/b-reproject.sh @@ -1,8 +1,13 @@ +# Synopysis: reproject each admin geojson from UTM to WGS84 for ADMIN in communes district province do - # use admin name to generate output and input file names + # generate the input name for the current ${ADMIN} file INPUT=${1}/tmp/vietnam-${ADMIN}.geojson + # generate the output name for the current ${ADMIN} file OUTPUT=${1}/output/vietnam-${ADMIN}-wgs84.geojson - # reproject to wgs84 + # reproject ${INPUT} to wgs84 with ogr2ogr + # -t_srs is a flag for reprojection + # EPSG:4326 is the WGS84 EPSG code + # http://spatialreference.org/ref/epsg/wgs-84/ ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}" done diff --git a/processing/c-update-geojson-spec.sh b/processing/c-update-geojson-spec.sh index b9ea392..577667c 100755 --- a/processing/c-update-geojson-spec.sh +++ b/processing/c-update-geojson-spec.sh @@ -1,10 +1,18 @@ -# enforce right-hand rule for polygons +# Synopysis: removes the crs object within geojsons outputed by b-reproject as well as enforces right hand rule for polygon draw orders. +# info about right hand rules and the new GeoJSON spec below +# for right hand, see the winding section here: https://macwright.org/2015/03/23/geojson-second-bite.html +# for the brave, here's the actual spec https://tools.ietf.org/html/rfc7946 + for ADMIN in communes district province do + # generate unique input and output files as it has been done in previous examples INPUT_FILE=${1}/tmp/vietnam-${ADMIN}-wgs84.geojson OUTPUT_FILE=${1}/output/vietnam-${ADMIN}-cleaned.geojson - # remove crs object to match current GeoJSON spec + # remove crs object to match current GeoJSON spec using sed. + # the below command was found in following place + # https://stackoverflow.com/questions/38028600/how-to-delete-a-json-object-from-json-file-by-sed-command-in-bash (see the mailer example) + # the `-i .org` allows inplace convserion so the ${INPUT_FILE} effectively has its crs removed. sed -i .org '/\"crs\"/ d; /^$/d' ${INPUT_FILE} - # enforce right to left polygons, also to match current spec + # geojson-rewind winds left-to right wound geojsons right-to-left. the right-to-left output is saved to ${OUTPUT_FILE} geojson-rewind ${INPUT_FILE} > ${OUTPUT_FILE} done diff --git a/processing/d-simplify-props.js b/processing/d-simplify-props.js index 17f13cb..34ce5be 100755 --- a/processing/d-simplify-props.js +++ b/processing/d-simplify-props.js @@ -1,35 +1,51 @@ +/** + * @file reads streaming admin geojson and reduces properties to match the schema of the table to which it is going to be written + */ + +// these modules are needed for streaming geojsons var createReadStream = require('fs').createReadStream; var createWriteStream = require('fs').createWriteStream; var readdirSync = require('fs').readdirSync; -var path = require('path'); -var parallel = require('async').parallel; - -var baseDir = 'data/processing/d-simplify-props' - -// streams to read and write geojsons var geojsonStream = require('geojson-stream'); var parser = geojsonStream.parse(); var stringifier = geojsonStream.stringify(); -// helps split single-line json into chunked-by-line geojson +// module to read path +var path = require('path'); +// parallel allows for reading each admin geojson stream asynchronously +var parallel = require('async').parallel; +// since the output of `c-update-geojson-spec.sh` writes geojsons to a single line, the stream needs to be broken up into lines, otherwise it will not work +// split is a module that does just this. var split = require('split'); -// tmp dir with geojsons -var adminPath = `${baseDir}/tmp`; + +// directory with geojson files +var adminPath = 'data/processing/d-simplify-props/tmp' +// read in files as a list usable in the parallel function var admins = readdirSync(adminPath) -// create list of async functions to pass to parallel +// create that list of async functions to pass to parallel const adminTasks = admins.map((admin) => { return function(cb) { + // the basename, really the admin level name, of the current admin var basename = admin.split('-')[1] + // the relative path to the current admin file var adminFile = path.join(adminPath, admin) + // a read stream of admin file var adminFileStream = createReadStream(adminFile) + // piping split makes the new lines mentioned to be neccessary above .pipe(split()) + // parser is a transform stream that parses geojson feature collections (the form of the input geojson) .pipe(parser) .on('data', (feature) => { + // make and pass feature's properties to the make makeNewProperties function that correctly transforms + // the properties to uniform spec needed to insert into the postgis tables const properties = feature.properties; + // reset the feature properties as the returj from makeNewProperties feature.properties = makeNewProperties(properties, basename) }) + // stringify the geojson to send to createWriteStream, then write it to fiel .pipe(stringifier) .pipe(createWriteStream(`${baseDir}/output/vietnam-${basename}-simplified.geojson`)) + // when createWriteStream is closed, fire a callback. .on('close', () => { cb(null, null) }) } }); @@ -58,6 +74,8 @@ function makeNewProperties (properties, admin) { return newProperties; } +// run adminTasks in parallel parallel(adminTasks, (err, res) => { + // do nothing when the are all finished if (!err) {} }); diff --git a/processing/e-insert-tables.js b/processing/e-insert-tables.js index 8de113e..c2ee251 100644 --- a/processing/e-insert-tables.js +++ b/processing/e-insert-tables.js @@ -1,55 +1,85 @@ +/** + * @file reads streaming admin geojson and 'inserts' each feature 'into' matching admin postgis table + */ + +// these modules are needed for streaming geojsons var createReadStream = require('fs').createReadStream; var createWriteStream = require('fs').createWriteStream; var readdirSync = require('fs').readdirSync; +var geojsonStream = require('geojson-stream'); +var parser = geojsonStream.parse(); +var stringifier = geojsonStream.stringify(); +// module to read path var path = require('path'); +// parallel allows for reading each admin geojson stream asynchronously var parallel = require('async').parallel; - -var baseDir = 'data/processing/d-simplify-props' +// knex creates a knex obj that links to the current environmnets database var knex = require('./db/connection/.js') +// postgis is a knex extension to generate postgis statements var postgis = require('knex-postgis'); - -// streams to read and write geojsons -var geojsonStream = require('geojson-stream'); -var parser = geojsonStream.parse(); -var stringifier = geojsonStream.stringify(); -// helps split single-line json into chunked-by-line geojson +// helps split single-line json into chunked-by-line geojson as mentinoed in d-simplify-props.js var split = require('split'); -// tmp dir with geojsons -var adminPath = `${baseDir}/tmp`; +// directory with geojsons +var adminPath = `data/processing/d-simplify-props/tmp`; +// array including elements with each file in that directory var admins = readdirSync(adminPath) - -var db = knex({dialect: 'postgres'}); -var st = postgis(db); +// st is short for spatial type. spatial type is the prefix for postgis functions that allow for spatial sql statements +// see https://postgis.net/docs/reference.html +var st = postgis(knex); // create list of async functions to pass to parallel const adminTasks = admins.map((admin) => { return function(cb) { + // base name mirrors admin name var basename = admin.split('-')[1] + // here's the path to the current admin file var adminFile = path.join(adminPath, admin) - var adminFile = path.join('./', admin); + // stream of this admin file var adminFileStream = createReadStream(adminFile) + // pipe split for the lines needed to send along to the geojson parser .pipe(split()) + // the geojson parser for parsing the feature collection .pipe(parser) .on('data', (feature) => { + // for each feature, insert it into the table using the insertIntoTable function insertIntoTable(feature, basename) }) + // fire a callback on end event .on('end', () => { cb(null, null) }) } }); +/** + * transforms feature into postgis table row and inserts it into the proper admin table + * + * @param {object} feature geojson feature + * @param {string} admin admin name + */ function insertIntoTable (feature, admin) { + // generate properties and geometry objects from feature object const properties = feature.properties; const geometry = feature.geometry; const statement = db.insert({ + // shared identifier for each row in admin table type: admin, + // numeric id for current admin unit id: properties.id, + // numeric id for currrent admin unit's parent (for instance a commune's parent district) + // this is helpful for future spatial analysis parent_id: properties.p_id, + // admin unit geometry geo: st.geomFromGeoJSON(geometry), + // english name of admin unit name_en: properties.en_name, + // vietnamese name of admin unit name_vn: '' - }).into(`${admin}-table`).toString(); + }) + // method that inserts the insert statement into its correct table + .into(`${admin}-table`).toString(); } +// run tasks in parallel parallel(adminTasks, (err, res) => { + // do nothing on result if (!err) {} });