-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/process command #3
Changes from 4 commits
8c642e7
c5709af
24500f5
c50836e
99f2373
a3e34bb
80a2725
28a74b0
4886f8f
7d23125
b6d5740
95909aa
3aa2351
2bba1a3
e45bc22
ab2d678
0b5a585
c843d94
c45e197
dd075e3
b89fde3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
node_modules | ||
**/*/.DS_store | ||
data |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Synopysis: links a set of I/O geoprocessing scripts that transform a commune level shapefile of Vietnam admin areas | ||
########### into three postgis tables at commune, district, and province levels/ | ||
|
||
# output directory that holds the final output of linked processes | ||
OUT_DIR=./data/output | ||
# the base processing directory that includes sub directories that I/O data for each process | ||
PROCESSING_BASE_DIR=./data/processing | ||
# a special directory used to handoff data between each process | ||
HNDF_DIR=./data/handoff | ||
|
||
# delete handoff or process directories from previous runs that may have errored. | ||
rm -rf ${HNDF_DIR} | ||
rm -rf ${PROCESSING_BASE_DIR} | ||
|
||
# make handoff and process directories for current pipeline run | ||
mkdir ${PROCESSING_BASE_DIR} | ||
mkdir ${HNDF_DIR} | ||
|
||
# make directories in ${PROCESSING_BASE_DIR} for each process's I/O these process scripts live in ./processing | ||
for FILE in ./processing/* | ||
do | ||
# make process file availabe | ||
chmod +x ${FILE} | ||
# get base filename from its path to generate the process's ${PROCESS_DIR} IN ${PROCESS_BASE_DIR} | ||
FILEBASE=${FILE##*/} | ||
FILESPLIT=(${FILEBASE//./ }) | ||
FILENAME=${FILESPLIT[0]} | ||
PROCESS_DIR=${PROCESSING_BASE_DIR}/${FILENAME} | ||
# make process dir | ||
mkdir ${PROCESS_DIR} | ||
# IN ${PROCESS_DIR} generate the input, tmp, and output ${PROCESS_SUBDIR}s needed to handle process specific I/O | ||
for SUBDIR in input tmp output | ||
do | ||
PROCESS_SUBDIR=${PROCESS_DIR}/${SUBDIR} | ||
mkdir ${PROCESS_SUBDIR} | ||
# if the current ${PROCESS_SUBDIR} is input, and the process is the first dissolve process, copy the pipeline's only input, the commune shapefile, into it | ||
if [[ $SUBDIR == *"input"* ]] | ||
then | ||
if [[ $PROCESS_SUBDIR == *"dissolve"* ]] | ||
then | ||
cp -R ./data/input/. ${PROCESS_SUBDIR}/ | ||
fi | ||
fi | ||
done | ||
# for all processes except the first dissolve process, first copy the data inside the ${HNDF_DIR} into the process's input dir, then delete that process's content from handoff | ||
# the reason for removal is to make sure only proper files exist there as some process scripts read in all of input and not files of a specific nomenclature | ||
if [[ $PROCESS_SUBDIR != *"dissolve"* ]] | ||
then | ||
cp -R ${HNDF_DIR}/. ${PROCESS_DIR}/input/ | ||
rm -f ${HNDF_DIR}/* | ||
fi | ||
# move input data to process's tmp dir so that any pipeline process errors allow for original input to be inspected. | ||
cp -R ${PROCESS_DIR}/input/. ${PROCESS_DIR}/tmp/ | ||
# run process with command specific to if it is a shell process or javascript process | ||
echo --- running ${FILENAME} --- | ||
if [[ $FILE == *".sh"* ]] | ||
then | ||
${FILE} ${PROCESS_DIR} | ||
else | ||
node ${FILE} ${PROCESS_DIR} | ||
fi | ||
# copy output contents to handoff directory for the next process to grab | ||
cp -R ${PROCESS_DIR}/output/. ${HNDF_DIR}/ | ||
done | ||
# clean up | ||
rm -rf ${HNDF_DIR} | ||
rm -rf ${PROCESSING_BASE_DIR} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"name": "openroads-vn-boundaries", | ||
"version": "0.0.1", | ||
"description": "processing scripts and data for OpenRoads Vietnam admin boundary data", | ||
"main": "index.js", | ||
"repository": "https://github.com/orma/openroads-vn-boundaries.git", | ||
"author": "maxgrossman <[email protected]>", | ||
"license": "MIT", | ||
"scripts": { | ||
"start": "chmod +x ./admin-tables-pipe.sh" | ||
}, | ||
"dependencies": { | ||
"async": "^2.5.0", | ||
"geojson-rewind": "^0.2.0", | ||
"geojson-stream": "^0.0.1", | ||
"iconv-lite": "^0.4.19", | ||
"knex": "^0.13.0", | ||
"knex-postgis": "^0.2.2", | ||
"split": "^1.0.1" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,20 @@ | ||
# input, output files | ||
INPUT=./data/tmp/vietnam-communes.shp | ||
# input filename + file | ||
INPUT_NAME=vietnam-communes | ||
|
||
INPUT=${1}/tmp/${INPUT_NAME}.shp | ||
# copy input shapefile into tmp directory | ||
cp ./data/input/* ./data/tmp | ||
|
||
# for districts and provinces + their uniq field | ||
for ADMIN in 'district;DISTCODE02' 'province;PROCODE02' | ||
do | ||
# split ADMIN into array including admin name and its field | ||
ADMIN_ARRAY=(${ADMIN//;/ }) | ||
# use admin name to generate output file name | ||
OUTPUT=./data/output/vietnam-${ADMIN_ARRAY[0]}.geojson | ||
OUTPUT=${1}/output/vietnam-${ADMIN_ARRAY[0]}.geojson | ||
# set DISSOLVE_FIELD to admin field | ||
DISSOLVE_FIELD=${ADMIN_ARRAY[1]} | ||
# dissolve on admin field and write to file | ||
ogr2ogr -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD" | ||
done | ||
# also convert communes shp to geojosn | ||
IN_SHP=${1}/tmp/${INPUT_NAME}.shp | ||
OUT_GJSN=${1}/output/${INPUT_NAME}.geojson | ||
ogr2ogr -f 'GeoJSON' "${OUT_GJSN}" "${IN_SHP}" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
for ADMIN in communes district province | ||
do | ||
# use admin name to generate output and input file names | ||
INPUT=${1}/tmp/vietnam-${ADMIN}.geojson | ||
OUTPUT=${1}/output/vietnam-${ADMIN}-wgs84.geojson | ||
# reproject to wgs84 | ||
ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}" | ||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# enforce right-hand rule for polygons | ||
for ADMIN in communes district province | ||
do | ||
INPUT_FILE=${1}/tmp/vietnam-${ADMIN}-wgs84.geojson | ||
OUTPUT_FILE=${1}/output/vietnam-${ADMIN}-cleaned.geojson | ||
# remove crs object to match current GeoJSON spec | ||
sed -i .org '/\"crs\"/ d; /^$/d' ${INPUT_FILE} | ||
# enforce right to left polygons, also to match current spec | ||
geojson-rewind ${INPUT_FILE} > ${OUTPUT_FILE} | ||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
var createReadStream = require('fs').createReadStream; | ||
var createWriteStream = require('fs').createWriteStream; | ||
var readdirSync = require('fs').readdirSync; | ||
var path = require('path'); | ||
var parallel = require('async').parallel; | ||
|
||
var baseDir = 'data/processing/d-simplify-props' | ||
|
||
// streams to read and write geojsons | ||
var geojsonStream = require('geojson-stream'); | ||
var parser = geojsonStream.parse(); | ||
var stringifier = geojsonStream.stringify(); | ||
// helps split single-line json into chunked-by-line geojson | ||
var split = require('split'); | ||
// tmp dir with geojsons | ||
var adminPath = `${baseDir}/tmp`; | ||
var admins = readdirSync(adminPath) | ||
|
||
// create list of async functions to pass to parallel | ||
const adminTasks = admins.map((admin) => { | ||
return function(cb) { | ||
var basename = admin.split('-')[1] | ||
var adminFile = path.join(adminPath, admin) | ||
var adminFileStream = createReadStream(adminFile) | ||
.pipe(split()) | ||
.pipe(parser) | ||
.on('data', (feature) => { | ||
const properties = feature.properties; | ||
feature.properties = makeNewProperties(properties, basename) | ||
}) | ||
.pipe(stringifier) | ||
.pipe(createWriteStream(`${baseDir}/output/vietnam-${basename}-simplified.geojson`)) | ||
.on('close', () => { cb(null, null) }) | ||
} | ||
}); | ||
|
||
/** | ||
* simplifies input properties to spec needed to make admin postgis tables | ||
* | ||
* @param {object} properties original properties from streaming geojson | ||
* @param {string} admin admin unit name, like 'commune', 'district,' | ||
* @return {object} newProperties simplified properties generated from properties | ||
*/ | ||
function makeNewProperties (properties, admin) { | ||
const newProperties = {}; | ||
if (RegExp(/commune/).test(admin)) { | ||
newProperties.en_name = properties.EN_name | ||
newProperties.id = properties.COMCODE02; | ||
newProperties.p_id = properties.DISTCODE02 | ||
} else if (RegExp(/district/).test(admin)) { | ||
newProperties.en_name = properties.D_EName | ||
newProperties.id = properties.DISTCODE02 | ||
newProperties.p_id = properties.PROCODE02 | ||
} else { | ||
newProperties.en_name = properties.P_EName | ||
newProperties.id = properties.PROCODE02 | ||
} | ||
return newProperties; | ||
} | ||
|
||
parallel(adminTasks, (err, res) => { | ||
if (!err) {} | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
'use strict'; | ||
var assert = require('assert'); | ||
|
||
// set the db urls base on environment | ||
var DEFAULT_ENVIRONMENT = 'development'; | ||
var environment = process.env.MACROCOSM_ENV || DEFAULT_ENVIRONMENT; | ||
var connection = process.env.DATABASE_URL || require('./local').connection[environment]; | ||
|
||
assert.ok(connection, 'Connection is undefined; check DATABASE_URL or local.js'); | ||
|
||
// connect knex to the current env's db. | ||
var knex = require('knex')({ | ||
client: 'pg', | ||
connection: connection, | ||
debug: false, | ||
pool: { | ||
min: 2, | ||
max: 10 | ||
} | ||
}); | ||
|
||
module.exports = knex; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
var createReadStream = require('fs').createReadStream; | ||
var createWriteStream = require('fs').createWriteStream; | ||
var readdirSync = require('fs').readdirSync; | ||
var path = require('path'); | ||
var parallel = require('async').parallel; | ||
|
||
var baseDir = 'data/processing/d-simplify-props' | ||
var knex = require('./db/connection/.js') | ||
var postgis = require('knex-postgis'); | ||
|
||
// streams to read and write geojsons | ||
var geojsonStream = require('geojson-stream'); | ||
var parser = geojsonStream.parse(); | ||
var stringifier = geojsonStream.stringify(); | ||
// helps split single-line json into chunked-by-line geojson | ||
var split = require('split'); | ||
// tmp dir with geojsons | ||
var adminPath = `${baseDir}/tmp`; | ||
var admins = readdirSync(adminPath) | ||
|
||
var db = knex({dialect: 'postgres'}); | ||
var st = postgis(db); | ||
|
||
// create list of async functions to pass to parallel | ||
const adminTasks = admins.map((admin) => { | ||
return function(cb) { | ||
var basename = admin.split('-')[1] | ||
var adminFile = path.join(adminPath, admin) | ||
var adminFile = path.join('./', admin); | ||
var adminFileStream = createReadStream(adminFile) | ||
.pipe(split()) | ||
.pipe(parser) | ||
.on('data', (feature) => { | ||
insertIntoTable(feature, basename) | ||
}) | ||
.on('end', () => { cb(null, null) }) | ||
} | ||
}); | ||
|
||
function insertIntoTable (feature, admin) { | ||
const properties = feature.properties; | ||
const geometry = feature.geometry; | ||
const statement = db.insert({ | ||
type: admin, | ||
id: properties.id, | ||
parent_id: properties.p_id, | ||
geo: st.geomFromGeoJSON(geometry), | ||
name_en: properties.en_name, | ||
name_vn: '' | ||
}).into(`${admin}-table`).toString(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't all levels of boundaries live in the same admin table? That's how I have it set up, and how we did it for ORMA Philippines. With one table, it's easier to query, and self-joins make it as versatile as multiple tables (eg, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For sure they can all live in the same table / they need to if that is how you have things set up! I'll make the changes to that script we're doing the right insert. |
||
} | ||
|
||
parallel(adminTasks, (err, res) => { | ||
if (!err) {} | ||
}); |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small thing: remove
.DS_Store
s that have already been committed when this line came in.