Skip to content

Commit

Permalink
document!
Browse files Browse the repository at this point in the history
  • Loading branch information
maxgrossman committed Sep 14, 2017
1 parent c50836e commit 99f2373
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 40 deletions.
27 changes: 17 additions & 10 deletions processing/a-dissolve.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
# input filename + file
# Synopysis: creates district and province level admins by dissolving (merging geometrise) of features with same uniq id.

# input name is used to build names of inputs and output files of each process
INPUT_NAME=vietnam-communes
# input for dissolve to make district and province level boundaries as well as convert the commune shapefile to geojson
INPUT=${1}/tmp/${INPUT_NAME}.shp
# copy input shapefile into tmp directory
# for districts and provinces + their uniq field

# for both the district and province, create a new geojson that dissolves features on the unique field id supplied
# on the right hand side of the semi-colon
for ADMIN in 'district;DISTCODE02' 'province;PROCODE02'
do
# split ADMIN into array including admin name and its field
# split ${ADMIN} string on the semi-colon to grab the admin name and field id
ADMIN_ARRAY=(${ADMIN//;/ })
# use admin name to generate output file name
# make the unique output file per the current admin name
OUTPUT=${1}/output/vietnam-${ADMIN_ARRAY[0]}.geojson
# set DISSOLVE_FIELD to admin field
# make ${DISSOLVE_FIELD} per the current admin's dissolve field
DISSOLVE_FIELD=${ADMIN_ARRAY[1]}
# dissolve on admin field and write to file
# dissolve on admin field with ogr2ogr and write output as a geojson
# this comman creates a new geojson where features are geometries that share the same ${DISSOLVE_FIELD}
# 'ST_UNION' merges geometries.
# 'GROUP BY' tells gdal which gemetries to merge together
ogr2ogr -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD"
done
# also convert communes shp to geojosn
IN_SHP=${1}/tmp/${INPUT_NAME}.shp
# name of geojson output file
OUT_GJSN=${1}/output/${INPUT_NAME}.geojson
ogr2ogr -f 'GeoJSON' "${OUT_GJSN}" "${IN_SHP}"
# since communes don't need to be dissolved, do a simple shp->geojson conversion
ogr2ogr -f 'GeoJSON' "${INPUT}" "${IN_SHP}"
9 changes: 7 additions & 2 deletions processing/b-reproject.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# Synopysis: reproject each admin geojson from UTM to WGS84
for ADMIN in communes district province
do
# use admin name to generate output and input file names
# generate the input name for the current ${ADMIN} file
INPUT=${1}/tmp/vietnam-${ADMIN}.geojson
# generate the output name for the current ${ADMIN} file
OUTPUT=${1}/output/vietnam-${ADMIN}-wgs84.geojson
# reproject to wgs84
# reproject ${INPUT} to wgs84 with ogr2ogr
# -t_srs is a flag for reprojection
# EPSG:4326 is the WGS84 EPSG code
# http://spatialreference.org/ref/epsg/wgs-84/
ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}"
done
14 changes: 11 additions & 3 deletions processing/c-update-geojson-spec.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
# enforce right-hand rule for polygons
# Synopysis: removes the crs object within geojsons outputed by b-reproject as well as enforces right hand rule for polygon draw orders.
# info about right hand rules and the new GeoJSON spec below
# for right hand, see the winding section here: https://macwright.org/2015/03/23/geojson-second-bite.html
# for the brave, here's the actual spec https://tools.ietf.org/html/rfc7946

for ADMIN in communes district province
do
# generate unique input and output files as it has been done in previous examples
INPUT_FILE=${1}/tmp/vietnam-${ADMIN}-wgs84.geojson
OUTPUT_FILE=${1}/output/vietnam-${ADMIN}-cleaned.geojson
# remove crs object to match current GeoJSON spec
# remove crs object to match current GeoJSON spec using sed.
# the below command was found in following place
# https://stackoverflow.com/questions/38028600/how-to-delete-a-json-object-from-json-file-by-sed-command-in-bash (see the mailer example)
# the `-i .org` allows inplace convserion so the ${INPUT_FILE} effectively has its crs removed.
sed -i .org '/\"crs\"/ d; /^$/d' ${INPUT_FILE}
# enforce right to left polygons, also to match current spec
# geojson-rewind winds left-to right wound geojsons right-to-left. the right-to-left output is saved to ${OUTPUT_FILE}
geojson-rewind ${INPUT_FILE} > ${OUTPUT_FILE}
done
38 changes: 28 additions & 10 deletions processing/d-simplify-props.js
Original file line number Diff line number Diff line change
@@ -1,35 +1,51 @@
/**
* @file reads streaming admin geojson and reduces properties to match the schema of the table to which it is going to be written
*/

// these modules are needed for streaming geojsons
var createReadStream = require('fs').createReadStream;
var createWriteStream = require('fs').createWriteStream;
var readdirSync = require('fs').readdirSync;
var path = require('path');
var parallel = require('async').parallel;

var baseDir = 'data/processing/d-simplify-props'

// streams to read and write geojsons
var geojsonStream = require('geojson-stream');
var parser = geojsonStream.parse();
var stringifier = geojsonStream.stringify();
// helps split single-line json into chunked-by-line geojson
// module to read path
var path = require('path');
// parallel allows for reading each admin geojson stream asynchronously
var parallel = require('async').parallel;
// since the output of `c-update-geojson-spec.sh` writes geojsons to a single line, the stream needs to be broken up into lines, otherwise it will not work
// split is a module that does just this.
var split = require('split');
// tmp dir with geojsons
var adminPath = `${baseDir}/tmp`;

// directory with geojson files
var adminPath = 'data/processing/d-simplify-props/tmp'
// read in files as a list usable in the parallel function
var admins = readdirSync(adminPath)

// create list of async functions to pass to parallel
// create that list of async functions to pass to parallel
const adminTasks = admins.map((admin) => {
return function(cb) {
// the basename, really the admin level name, of the current admin
var basename = admin.split('-')[1]
// the relative path to the current admin file
var adminFile = path.join(adminPath, admin)
// a read stream of admin file
var adminFileStream = createReadStream(adminFile)
// piping split makes the new lines mentioned to be neccessary above
.pipe(split())
// parser is a transform stream that parses geojson feature collections (the form of the input geojson)
.pipe(parser)
.on('data', (feature) => {
// make and pass feature's properties to the make makeNewProperties function that correctly transforms
// the properties to uniform spec needed to insert into the postgis tables
const properties = feature.properties;
// reset the feature properties as the returj from makeNewProperties
feature.properties = makeNewProperties(properties, basename)
})
// stringify the geojson to send to createWriteStream, then write it to fiel
.pipe(stringifier)
.pipe(createWriteStream(`${baseDir}/output/vietnam-${basename}-simplified.geojson`))
// when createWriteStream is closed, fire a callback.
.on('close', () => { cb(null, null) })
}
});
Expand Down Expand Up @@ -58,6 +74,8 @@ function makeNewProperties (properties, admin) {
return newProperties;
}

// run adminTasks in parallel
parallel(adminTasks, (err, res) => {
// do nothing when the are all finished
if (!err) {}
});
60 changes: 45 additions & 15 deletions processing/e-insert-tables.js
Original file line number Diff line number Diff line change
@@ -1,55 +1,85 @@
/**
* @file reads streaming admin geojson and 'inserts' each feature 'into' matching admin postgis table
*/

// these modules are needed for streaming geojsons
var createReadStream = require('fs').createReadStream;
var createWriteStream = require('fs').createWriteStream;
var readdirSync = require('fs').readdirSync;
var geojsonStream = require('geojson-stream');
var parser = geojsonStream.parse();
var stringifier = geojsonStream.stringify();
// module to read path
var path = require('path');
// parallel allows for reading each admin geojson stream asynchronously
var parallel = require('async').parallel;

var baseDir = 'data/processing/d-simplify-props'
// knex creates a knex obj that links to the current environmnets database
var knex = require('./db/connection/.js')
// postgis is a knex extension to generate postgis statements
var postgis = require('knex-postgis');

// streams to read and write geojsons
var geojsonStream = require('geojson-stream');
var parser = geojsonStream.parse();
var stringifier = geojsonStream.stringify();
// helps split single-line json into chunked-by-line geojson
// helps split single-line json into chunked-by-line geojson as mentinoed in d-simplify-props.js
var split = require('split');
// tmp dir with geojsons
var adminPath = `${baseDir}/tmp`;
// directory with geojsons
var adminPath = `data/processing/d-simplify-props/tmp`;
// array including elements with each file in that directory
var admins = readdirSync(adminPath)

var db = knex({dialect: 'postgres'});
var st = postgis(db);
// st is short for spatial type. spatial type is the prefix for postgis functions that allow for spatial sql statements
// see https://postgis.net/docs/reference.html
var st = postgis(knex);

// create list of async functions to pass to parallel
const adminTasks = admins.map((admin) => {
return function(cb) {
// base name mirrors admin name
var basename = admin.split('-')[1]
// here's the path to the current admin file
var adminFile = path.join(adminPath, admin)
var adminFile = path.join('./', admin);
// stream of this admin file
var adminFileStream = createReadStream(adminFile)
// pipe split for the lines needed to send along to the geojson parser
.pipe(split())
// the geojson parser for parsing the feature collection
.pipe(parser)
.on('data', (feature) => {
// for each feature, insert it into the table using the insertIntoTable function
insertIntoTable(feature, basename)
})
// fire a callback on end event
.on('end', () => { cb(null, null) })
}
});

/**
* transforms feature into postgis table row and inserts it into the proper admin table
*
* @param {object} feature geojson feature
* @param {string} admin admin name
*/
function insertIntoTable (feature, admin) {
// generate properties and geometry objects from feature object
const properties = feature.properties;
const geometry = feature.geometry;
const statement = db.insert({
// shared identifier for each row in admin table
type: admin,
// numeric id for current admin unit
id: properties.id,
// numeric id for currrent admin unit's parent (for instance a commune's parent district)
// this is helpful for future spatial analysis
parent_id: properties.p_id,
// admin unit geometry
geo: st.geomFromGeoJSON(geometry),
// english name of admin unit
name_en: properties.en_name,
// vietnamese name of admin unit
name_vn: ''
}).into(`${admin}-table`).toString();
})
// method that inserts the insert statement into its correct table
.into(`${admin}-table`).toString();
}

// run tasks in parallel
parallel(adminTasks, (err, res) => {
// do nothing on result
if (!err) {}
});

0 comments on commit 99f2373

Please sign in to comment.