orma · mileswwatkins · Nov 2, 2017 · Sep 14, 2017 · Sep 14, 2017 · Sep 14, 2017
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+node_modules
+**/*/.DS_store
+data
diff --git a/admin-tables-pipe.sh b/admin-tables-pipe.sh
@@ -0,0 +1,67 @@
+# Synopysis: links a set of I/O geoprocessing scripts that transform a commune level shapefile of Vietnam admin areas
+###########  into three postgis tables at commune, district, and province levels/
+
+# output directory that holds the final output of linked processes
+OUT_DIR=./data/output
+# the base processing directory that includes sub directories that I/O data for each process
+PROCESSING_BASE_DIR=./data/processing
+# a special directory used to handoff data between each process
+HNDF_DIR=./data/handoff
+
+# delete handoff or process directories from previous runs that may have errored.
+rm -rf ${HNDF_DIR}
+rm -rf ${PROCESSING_BASE_DIR}
+
+# make handoff and process directories for current pipeline run
+mkdir ${PROCESSING_BASE_DIR}
+mkdir ${HNDF_DIR}
+
+# make directories in ${PROCESSING_BASE_DIR} for each process's I/O these process scripts live in ./processing
+for FILE in ./processing/*
+do
+  # make process file availabe
+  chmod +x ${FILE}
+  # get base filename from its path to generate the process's ${PROCESS_DIR} IN ${PROCESS_BASE_DIR}
+  FILEBASE=${FILE##*/}
+  FILESPLIT=(${FILEBASE//./ })
+  FILENAME=${FILESPLIT[0]}
+  PROCESS_DIR=${PROCESSING_BASE_DIR}/${FILENAME}
+  # make process dir
+  mkdir ${PROCESS_DIR}
+  # IN ${PROCESS_DIR} generate the input, tmp, and output ${PROCESS_SUBDIR}s needed to handle process specific I/O
+  for SUBDIR in input tmp output
+  do
+    PROCESS_SUBDIR=${PROCESS_DIR}/${SUBDIR}
+    mkdir ${PROCESS_SUBDIR}
+    # if the current ${PROCESS_SUBDIR} is input, and the process is the first dissolve process, copy the pipeline's only input, the commune shapefile, into it
+    if [[ $SUBDIR == *"input"* ]]
+    then
+      if [[ $PROCESS_SUBDIR == *"dissolve"* ]]
+      then
+        cp -R ./data/input/. ${PROCESS_SUBDIR}/
+      fi
+    fi
+  done
+  # for all processes except the first dissolve process, first copy the data inside the ${HNDF_DIR} into the process's input dir, then delete that process's content from handoff
+  # the reason for removal is to make sure only proper files exist there as some process scripts read in all of input and not files of a specific nomenclature
+  if [[ $PROCESS_SUBDIR != *"dissolve"* ]]
+  then
+    cp -R ${HNDF_DIR}/. ${PROCESS_DIR}/input/
+    rm -f  ${HNDF_DIR}/*
+  fi
+  # move input data to process's tmp dir so that any pipeline process errors allow for original input to be inspected.
+  cp -R ${PROCESS_DIR}/input/. ${PROCESS_DIR}/tmp/
+  # run process with command specific to if it is a shell process or javascript process
+  echo --- running ${FILENAME} ---
+  if [[ $FILE == *".sh"* ]]
+  then
+    ${FILE} ${PROCESS_DIR}
+  else
+    node ${FILE} ${PROCESS_DIR}
+  fi
+  # copy output contents to handoff directory for the next process to grab
+  cp -R ${PROCESS_DIR}/output/. ${HNDF_DIR}/
+done
+# clean up
+rm -rf ${HNDF_DIR}
+rm -rf ${PROCESSING_BASE_DIR}
diff --git a/package.json b/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "openroads-vn-boundaries",
+  "version": "0.0.1",
+  "description": "processing scripts and data for OpenRoads Vietnam admin boundary data",
+  "main": "index.js",
+  "repository": "https://github.com/orma/openroads-vn-boundaries.git",
+  "author": "maxgrossman <[email protected]>",
+  "license": "MIT",
+  "scripts": {
+    "start": "chmod +x ./admin-tables-pipe.sh"
+  },
+  "dependencies": {
+    "async": "^2.5.0",
+    "geojson-rewind": "^0.2.0",
+    "geojson-stream": "^0.0.1",
+    "iconv-lite": "^0.4.19",
+    "knex": "^0.13.0",
+    "knex-postgis": "^0.2.2",
+    "split": "^1.0.1"
+  }
+}
diff --git a/processing/dissolve.sh → processing/a-dissolve.sh b/processing/dissolve.sh → processing/a-dissolve.sh
@@ -1,19 +1,20 @@
-# input, output files
-INPUT=./data/tmp/vietnam-communes.shp
+# input filename + file
 INPUT_NAME=vietnam-communes
-
+INPUT=${1}/tmp/${INPUT_NAME}.shp
 # copy input shapefile into tmp directory
-cp ./data/input/* ./data/tmp
-
 # for districts and provinces + their uniq field
 for ADMIN in 'district;DISTCODE02' 'province;PROCODE02'
 do
   # split ADMIN into array including admin name and its field
   ADMIN_ARRAY=(${ADMIN//;/ })
   # use admin name to generate output file name
-  OUTPUT=./data/output/vietnam-${ADMIN_ARRAY[0]}.geojson
+  OUTPUT=${1}/output/vietnam-${ADMIN_ARRAY[0]}.geojson
   # set DISSOLVE_FIELD to admin field
   DISSOLVE_FIELD=${ADMIN_ARRAY[1]}
   # dissolve on admin field and write to file
   ogr2ogr -f 'GeoJSON' "${OUTPUT}" "${INPUT}" -dialect sqlite -sql $'SELECT ST_Union(geometry), * FROM "'"$INPUT_NAME"$'" GROUP BY '"$DISSOLVE_FIELD"
 done
+# also convert communes shp to geojosn
+IN_SHP=${1}/tmp/${INPUT_NAME}.shp
+OUT_GJSN=${1}/output/${INPUT_NAME}.geojson
+ogr2ogr -f 'GeoJSON' "${OUT_GJSN}" "${IN_SHP}"
diff --git a/processing/b-reproject.sh b/processing/b-reproject.sh
@@ -0,0 +1,8 @@
+for ADMIN in communes district province
+do
+  # use admin name to generate output and input file names
+  INPUT=${1}/tmp/vietnam-${ADMIN}.geojson
+  OUTPUT=${1}/output/vietnam-${ADMIN}-wgs84.geojson
+  # reproject to wgs84
+  ogr2ogr -t_srs EPSG:4326 -f 'GeoJSON' "${OUTPUT}" "${INPUT}"
+done
diff --git a/processing/c-update-geojson-spec.sh b/processing/c-update-geojson-spec.sh
@@ -0,0 +1,10 @@
+# enforce right-hand rule for polygons
+for ADMIN in communes district province
+do
+  INPUT_FILE=${1}/tmp/vietnam-${ADMIN}-wgs84.geojson
+  OUTPUT_FILE=${1}/output/vietnam-${ADMIN}-cleaned.geojson
+  # remove crs object to match current GeoJSON spec
+  sed -i .org '/\"crs\"/ d; /^$/d' ${INPUT_FILE}
+  # enforce right to left polygons, also to match current spec
+  geojson-rewind ${INPUT_FILE} > ${OUTPUT_FILE}
+done
diff --git a/processing/d-simplify-props.js b/processing/d-simplify-props.js
@@ -0,0 +1,63 @@
+var createReadStream = require('fs').createReadStream;
+var createWriteStream = require('fs').createWriteStream;
+var readdirSync = require('fs').readdirSync;
+var path = require('path');
+var parallel = require('async').parallel;
+
+var baseDir = 'data/processing/d-simplify-props'
+
+// streams to read and write geojsons
+var geojsonStream = require('geojson-stream');
+var parser = geojsonStream.parse();
+var stringifier = geojsonStream.stringify();
+// helps split single-line json into chunked-by-line geojson
+var split = require('split');
+// tmp dir with geojsons
+var adminPath = `${baseDir}/tmp`;
+var admins = readdirSync(adminPath)
+
+// create list of async functions to pass to parallel
+const adminTasks = admins.map((admin) => {
+  return function(cb) {
+    var basename = admin.split('-')[1]
+    var adminFile = path.join(adminPath, admin)
+    var adminFileStream = createReadStream(adminFile)
+    .pipe(split())
+    .pipe(parser)
+    .on('data', (feature) => {
+      const properties = feature.properties;
+      feature.properties = makeNewProperties(properties, basename)
+    })
+    .pipe(stringifier)
+    .pipe(createWriteStream(`${baseDir}/output/vietnam-${basename}-simplified.geojson`))
+    .on('close', () => { cb(null, null) })
+  }
+});
+
+/**
+ * simplifies input properties to spec needed to make admin postgis tables
+ *
+ * @param {object} properties original properties from streaming geojson
+ * @param {string} admin admin unit name, like 'commune', 'district,'
+ * @return {object} newProperties simplified properties generated from properties
+ */
+function makeNewProperties (properties, admin) {
+  const newProperties = {};
+  if (RegExp(/commune/).test(admin)) {
+    newProperties.en_name = properties.EN_name
+    newProperties.id = properties.COMCODE02;
+    newProperties.p_id = properties.DISTCODE02
+  } else if (RegExp(/district/).test(admin)) {
+    newProperties.en_name = properties.D_EName
+    newProperties.id = properties.DISTCODE02
+    newProperties.p_id = properties.PROCODE02
+  } else {
+    newProperties.en_name = properties.P_EName
+    newProperties.id = properties.PROCODE02
+  }
+  return newProperties;
+}
+
+parallel(adminTasks, (err, res) => {
+  if (!err) {}
+});
diff --git a/processing/db/connection.js b/processing/db/connection.js
@@ -0,0 +1,22 @@
+'use strict';
+var assert = require('assert');
+
+// set the db urls base on environment
+var DEFAULT_ENVIRONMENT = 'development';
+var environment = process.env.MACROCOSM_ENV || DEFAULT_ENVIRONMENT;
+var connection = process.env.DATABASE_URL || require('./local').connection[environment];
+
+assert.ok(connection, 'Connection is undefined; check DATABASE_URL or local.js');
+
+// connect knex to the current env's db.
+var knex = require('knex')({
+  client: 'pg',
+  connection: connection,
+  debug: false,
+  pool: {
+    min: 2,
+    max: 10
+  }
+});
+
+module.exports = knex;
diff --git a/processing/e-insert-tables.js b/processing/e-insert-tables.js
@@ -0,0 +1,55 @@
+var createReadStream = require('fs').createReadStream;
+var createWriteStream = require('fs').createWriteStream;
+var readdirSync = require('fs').readdirSync;
+var path = require('path');
+var parallel = require('async').parallel;
+
+var baseDir = 'data/processing/d-simplify-props'
+var knex = require('./db/connection/.js')
+var postgis = require('knex-postgis');
+
+// streams to read and write geojsons
+var geojsonStream = require('geojson-stream');
+var parser = geojsonStream.parse();
+var stringifier = geojsonStream.stringify();
+// helps split single-line json into chunked-by-line geojson
+var split = require('split');
+// tmp dir with geojsons
+var adminPath = `${baseDir}/tmp`;
+var admins = readdirSync(adminPath)
+
+var db = knex({dialect: 'postgres'});
+var st = postgis(db);
+
+// create list of async functions to pass to parallel
+const adminTasks = admins.map((admin) => {
+  return function(cb) {
+    var basename = admin.split('-')[1]
+    var adminFile = path.join(adminPath, admin)
+    var adminFile = path.join('./', admin);
+    var adminFileStream = createReadStream(adminFile)
+    .pipe(split())
+    .pipe(parser)
+    .on('data', (feature) => {
+      insertIntoTable(feature, basename)
+    })
+    .on('end', () => { cb(null, null) })
+  }
+});
+
+function insertIntoTable (feature, admin) {
+  const properties = feature.properties;
+  const geometry = feature.geometry;
+  const statement = db.insert({
+    type: admin,
+    id: properties.id,
+    parent_id: properties.p_id,
+    geo: st.geomFromGeoJSON(geometry),
+    name_en: properties.en_name,
+    name_vn: ''
+  }).into(`${admin}-table`).toString();
+}
+
+parallel(adminTasks, (err, res) => {
+  if (!err) {}
+});
diff --git a/processing/reproject.sh b/processing/reproject.sh