Skip to content

Commit

Permalink
include fill holes and run pipeline
Browse files Browse the repository at this point in the history
pipeline is now running with the fill holes step.
  • Loading branch information
maxgrossman committed Sep 19, 2017
1 parent 80a2725 commit 28a74b0
Show file tree
Hide file tree
Showing 20 changed files with 330 additions and 196 deletions.
Binary file removed .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion db/local/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module.exports = {
connection: {
'development': '',
'development': 'postgres://[email protected]:5433/openroads',
'staging': '',
'production': ''
}
Expand Down
2 changes: 1 addition & 1 deletion docker/clean-geometries/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ COPY ./vietnam-communes.shp /workspace/vietnam-communes.shp
COPY ./vietnam-communes.shx /workspace/vietnam-communes.shx
COPY ./run.sh /workspace/run.sh
# on entry into container, run the run.sh script
ENTRYPOINT ["/bin/bash", "/workspace/run.sh"]
ENTRYPOINT ["/bin/bash", "/workspace/main.sh"]
5 changes: 3 additions & 2 deletions docker/clean-geometries/clean-geom.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# the following qgis modules import and order reference the following
# https://github.com/nuest/docker-qgis-model/blob/master/workspace/example/model.py#L20
from qgis.core import *
import qgis.utilsinput_communes,
import qgis.utils
# to use processing script a qgis app needs to be initialized
app = QgsApplication([], True)
QgsApplication.setPrefixPath('/usr', True)
Expand All @@ -19,5 +19,6 @@
# set path to inputs and outputs
input_communes = os.path.join(os.getcwd(), sys.argv[1])
output_communes = os.path.join(os.getcwd(), sys.argv[2] + '.shp')
error_communes = os.path.join(os.getcwd(), sys.argv[2] + '-error.shp')
# clean the geometries
processing.runalg('grass:v.clean',input_communes,0, 0.1,'205952.54923,985984.624375,929508.401261,2586975.43865',-1, 0.0001,output_communes)
processing.runalg('grass:v.clean',input_communes,0, 0.1,'205952.54923,985984.624375,929508.401261,2586975.43865',-1, 0.0001,output_communes,error_communes)
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ cd /workspace
# run qgis process in python while keeping qgis 'headless', or put otherwise access and use qgis processing modules
# without running the qgis gui
# clean the geometry for the initial communes file
xvfb-run -e ${XVFB_LOGFILE} python clean-geom.py vietnam-communes.shp vietnam-communes-clean-geom
xvfb-run -a python clean-geom.py vietnam-communes.shp vietnam-communes-clean-geom
5 changes: 2 additions & 3 deletions docker/delete-holes/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
FROM nuest/qgis-model:xenial-multimodel
COPY ./delete-holes.py /workspace/delete-holes.py
COPY ./run.sh /workspace/run.sh
COPY ./vietnam-communes.geojson /workspace/vietnam-communes.geojson
COPY ./main.sh /workspace/main.sh
COPY ./vietnam-district.geojson /workspace/vietnam-district.geojson
COPY ./vietnam-province.geojson /workspace/vietnam-province.geojson
# on entry into container, run the run.sh script
ENTRYPOINT ["/bin/bash", "/workspace/run.sh"]
ENTRYPOINT ["/bin/bash", "/workspace/main.sh"]
6 changes: 6 additions & 0 deletions docker/delete-holes/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# run qgis process in python while keeping qgis 'headless', or put otherwise access and use qgis processing modules
# without running the qgis gui
cd /workspace
# remove holes for all three admin files
xvfb-run -a python delete-holes.py vietnam-province.geojson vietnam-province-filled-holes
xvfb-run -a python delete-holes.py vietnam-district.geojson vietnam-district-filled-holes
7 changes: 0 additions & 7 deletions docker/delete-holes/run.sh

This file was deleted.

97 changes: 97 additions & 0 deletions js/insert-into-table.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/**
* @file reads streaming admin geojson and 'inserts' each feature 'into' matching admin postgis table
*/

// these modules are needed for streaming geojsons
var createReadStream = require('fs').createReadStream;
var createWriteStream = require('fs').createWriteStream;
var readdirSync = require('fs').readdirSync;
var geojsonStream = require('geojson-stream');
var parser = geojsonStream.parse();
var stringifier = geojsonStream.stringify();
// module to read path
var path = require('path');
// parallel allows for reading each admin geojson stream asynchronously
var parallel = require('async').parallel;
// knex creates a knex obj that links to the current environmnets database
// var knex = require('./db/connection/.js')
var db = require('../db/connection');
// postgis is a knex extension to generate postgis statements
var postgis = require('knex-postgis');
// helps split single-line json into chunked-by-line geojson as mentinoed in d-simplify-props.js
var split = require('split');
// directory with geojsons
var baseDir = process.argv[2];
var adminInPath = `${baseDir}/tmp`
// array including elements with each file in that directory
// st is short for spatial type. spatial type is the prefix for postgis functions that allow for spatial sql statements
// see https://postgis.net/docs/reference.html
var st = postgis(db);

// return current admin
var admin = readdirSync(adminInPath).find((admin) => new RegExp(process.argv[3]).test(admin));
// base name mirrors admin name
var basename = admin.split('-')[1]
// here's the path to the current admin file
var adminFile = path.join(adminInPath, admin)
// stream of this admin file
var adminFileStream = createReadStream(adminFile)
// pipe split for the lines needed to send along to the geojson parser
.pipe(split())
// the geojson parser for parsing the feature collection
.pipe(parser)
.on('data', (feature) => {
// for each feature, insert it into the table using the insertIntoTable function
if (feature.properties) {
console.log(feature.properties);
insertIntoTable(feature, basename, st, db)
}
})
.on('end', () => {
db.destroy();
})

/**
* transforms feature into postgis table row and inserts it into the proper admin table
*
* @param {object} feature geojson feature
* @param {string} admin admin name
* @param {object} st spatial type object (generated by knex postgis extension) that allows for making st/postgis statements
* @param {object} db kenx object for connecting to the database
*
*/
function insertIntoTable (feature, admin, st, db) {
// generate properties and geometry objects from feature object
const properties = feature.properties;
const geometry = feature.geometry;
if (admin === 'communes') {
admin = 'commune';
}
if (!properties.en_name) {
properties.en_name = '...'
}
return db.transaction((t) => {
return db('admin_boundaries')
.transacting(t)
.insert({
// shared identifier for each row in admin table
type: admin,
// numeric id for current admin unit
id: properties.id,
// numeric id for currrent admin unit's parent (for instance a commune's parent district)
// this is helpful for future spatial analysis
parent_id: properties.p_id,
// admin unit geometry
geom: st.geomFromGeoJSON(geometry),
// english name of admin unit
name_en: properties.en_name,
// vietnamese name of admin unit
name_vn: ''
})
.then(t.commit)
.catch((e) => {
t.rollback();
throw e;
})
});
}
106 changes: 106 additions & 0 deletions js/simplify-props.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/**
* @file reads streaming admin geojson and reduces properties to match the schema of the table to which it is going to be written
*/

// these modules are needed for streaming geojsons
var createReadStream = require('fs').createReadStream;
var createWriteStream = require('fs').createWriteStream;
var readdirSync = require('fs').readdirSync;
var geojsonStream = require('geojson-stream');
var parser = geojsonStream.parse();
var stringifier = geojsonStream.stringify();
// module to read path
var path = require('path');
// parallel allows for reading each admin geojson stream asynchronously
var parallel = require('async').parallel;
// since the output of `c-update-geojson-spec.sh` writes geojsons to a single line, the stream needs to be broken up into lines, otherwise it will not work
// split is a module that does just this.
var split = require('split');
// directory for geojson input and output
var baseDir = process.argv[2];
var adminInPath = `${baseDir}/tmp`;
var adminOutPath = `${baseDir}/output`
// read in files as a list usable in the parallel function
var admin = readdirSync(adminInPath).find((admin) => new RegExp(process.argv[3]).test(admin));
writeSimplifiedProps(admin);

/**
* simplifies input properties to spec needed to make admin postgis tables
*
* @param {object} properties original properties from streaming geojson
* @param {string} admin admin unit name, like 'commune', 'district,'
* @return {object} newProperties simplified properties generated from properties
*/
function makeNewProperties (properties, admin) {
const newProperties = {};
if (new RegExp(/commune/).test(admin)) {
newProperties.en_name = properties.EN_name
newProperties.id = properties.COMCODE02;
newProperties.p_id = properties.DISTCODE02
} else if (new RegExp(/district/).test(admin)) {
newProperties.en_name = properties.D_EName
newProperties.id = properties.DISTCODE02
newProperties.p_id = properties.PROCODE02
} else if (new RegExp(/province/).test(admin)) {
newProperties.en_name = properties.P_EName
newProperties.id = properties.PROCODE02
}
newProperties.en_name = cleanName(newProperties.en_name, admin);
return newProperties;
}

/**
* reads in raw geojson and writes out simplified geojson for provided admin level
* @param {string} admin string representation of admin type
*
*/
function writeSimplifiedProps(adminPath) {
// the basename, really the admin level name, of the current admin
var basename = admin.split('-')[1];
// the relative path to the current admin file
var adminInFile = path.join(adminInPath, admin)
// a read stream of admin file
createReadStream(adminInFile)
// piping split makes the new lines mentioned to be neccessary above
.pipe(split())
// parser is a transform stream that parses geojson feature collections (the form of the input geojson)
.pipe(parser)
.on('data', (feature) => {
// make and pass feature's properties to the make makeNewProperties function that correctly transforms
// the properties to uniform spec needed to insert into the postgis tables
const properties = feature.properties;
// reset the feature properties as the returj from makeNewProperties
feature.properties = makeNewProperties(properties, basename)
})
// stringify the geojson to send to createWriteStream, then write it to fiel
.pipe(stringifier)
.pipe(createWriteStream(`${adminOutPath}/vietnam-${basename}-simplified.geojson`))
}

/**
* returns cleaned version of place name
* @param {string} name admin unit name
* @return {string} cleaned admin unit name
*/
function cleanName(name, admin) {
let cleanName = name;
if (name) {
if (new RegExp(/X. /).test(name)) {
cleanName = name.replace('X. ','');
} else if (new RegExp(/P. /).test(name)) {
cleanName = name.replace('P. ', '')
} else if (new RegExp(/Tt. /).test(name)) {
cleanName = name.replace('Tt. ', '')
} else if (new RegExp(/P. /).test(name)) {
cleanName = name.replace('P. ', '')
} else if (new RegExp(/ D./).test(name)){
cleanName = name.replace(' D.', '')
} else if (new RegExp(/\\?/).test(name)) {
cleanName = name.replace('?', 'ỉ')
}
if (Boolean(Number(cleanName))) {
cleanName = `${admin} ${cleanName}`
}
}
return cleanName
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"iconv-lite": "^0.4.19",
"knex": "^0.13.0",
"knex-postgis": "^0.2.2",
"pg": "^7.3.0",
"split": "^1.0.1"
}
}
8 changes: 2 additions & 6 deletions processing/b-dissolve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ INPUT=${1}/tmp/${INPUT_NAME}.shp

# for both the district and province, create a new geojson that dissolves features on the unique field id supplied
# on the right hand side of the semi-colon
for ADMIN in 'district;DISTCODE02' 'province;PROCODE02'
for ADMIN in 'communes;COMCODE02' 'district;DISTCODE02' 'province;PROCODE02'
do
# split ${ADMIN} string on the semi-colon to grab the admin name and field id
ADMIN_ARRAY=(${ADMIN//;/ })
Expand All @@ -24,8 +24,4 @@ do
# http://spatialreference.org/ref/epsg/wgs-84/
ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD"
done
# name of geojson output file
OUT_GJSN=${1}/output/${INPUT_NAME}.geojson
# since communes don't need to be dissolved, do a simple shp->geojson conversion
# make sure also to reproject
ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUT_GJSN}" "${INPUT}"
# ç
5 changes: 3 additions & 2 deletions processing/c-delete-holes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ docker run -it qgis_headless
# get admin areas from container add copy them over to the output folder of the process
# using the docker cp command, copying from the most recently built container to the output folder
# `docker ps --latest -q` grabs the most recent container
docker cp `docker ps --latest -q`:workspace/vietnam-communes-filled-holes.geojson ${1}/output/vietnam-communes-filled-holes.geojson
docker cp `docker ps --latest -q`:workspace/vietnam-district-filled-holes.geojson ${1}/output/vietnam-district-filled-holes.geojson
docker cp `docker ps --latest -q`:workspace/vietnam-province-filled-holes.geojson ${1}/output/vietnam-province-filled-holes.geojson
docker cp `docker ps --latest -q`:workspace/vietnam-district-filled-holes.geojson ${1}/output/vietnam-district-filled-holes.geojson
# communes do not need filled holes. so they are just copied directly from the tmp to output folderope
cp ${1}/tmp/vietnam-communes.geojson ${1}/output/vietnam-communes-filled-holes.geojson
# clean up the docker directory
rm -f ./docker/delete-holes/vietnam*
2 changes: 1 addition & 1 deletion processing/d-update-geojson-spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
for ADMIN in communes district province
do
# generate unique input and output files as it has been done in previous examples
INPUT_FILE=${1}/tmp/vietnam-${ADMIN}-wgs84.geojson
INPUT_FILE=${1}/tmp/vietnam-${ADMIN}-filled-holes.geojson
OUTPUT_FILE=${1}/output/vietnam-${ADMIN}-cleaned.geojson
# remove crs object to match current GeoJSON spec using sed.
# the below command was found in following place
Expand Down
81 changes: 0 additions & 81 deletions processing/e-simplify-props.js

This file was deleted.

Loading

0 comments on commit 28a74b0

Please sign in to comment.