Skip to content

Commit

Permalink
Merge pull request #17 from OCHA-DAP/clean-duplicate-adm0s
Browse files Browse the repository at this point in the history
Clean duplicate adm0s
  • Loading branch information
hannahker authored Nov 26, 2024
2 parents dacb088 + 7833650 commit b9f7ebb
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ test_outputs/*
*.sql

data/*


*.egg-info/
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ source venv/bin/activate
```
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install -e .
```

3. Create a local `.env` file with the following environment variables:
Expand Down
79 changes: 79 additions & 0 deletions helpers/load_polygons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""
This is a temporary helper script to load CODAB data from Fieldmaps to
a private Azure Storage Container. This is done to avoid overloading the
Fieldmaps server during large historical runs, and to integrate some basic
data cleaning that needs to be done on select ISO3 datasets.
This script will likely be quickly deprecated, so has not been written to
full production standards.
Usage: Run LOCALLY from root-level project directory `python helpers/load_polygons.py`
"""


import os
import zipfile
from pathlib import Path

import geopandas as gpd
import requests

from src.utils.cloud_utils import get_container_client
from src.utils.iso3_utils import get_metadata, load_shp

df = get_metadata()
# TODO: Swap out "dev"/"prod" depending on which container
# you're writing to
container_client = get_container_client("dev", "polygon")
data_dir = Path("data/tmp")


def download_zip(url):
response = requests.get(url)
if response.status_code == 200:
return response.content
else:
print(f"Failed to download: {url}")
return None


if __name__ == "__main__":
# TODO: Right now only set up to run locally
for _, row in df.iterrows():
shp_link = row["o_shp"]
iso3 = row["iso_3"]
print(f"Processing data for {iso3}...")
zip_data = download_zip(shp_link)

# Specific ISO3s that need to be dissolved at adm0 level
# Temporary workaround before it's fixed in Fieldmaps
if iso3 in ["NGA", "TCD", "BDI"]:
outpath = "data/tmp/"
load_shp(shp_link, outpath, iso3)
adm0 = gpd.read_file(f"{outpath}{iso3}_adm0.shp")
adm0 = adm0.dissolve()
adm0.to_file(f"{outpath}{iso3}_adm0.shp")

zip_name = f"{data_dir}/{iso3.lower()}_shp.zip"

with zipfile.ZipFile(zip_name, "w") as zipf:
for adm_level in range(3): # 0 to 2
base_name = f"{iso3.lower()}_adm{adm_level}"
for ext in [".shp", ".dbf", ".prj", ".shx", ".cpg"]:
file_path = os.path.join(data_dir, base_name + ext)
if os.path.exists(file_path):
zipf.write(file_path, os.path.basename(file_path))

with open(zip_name, "rb") as zip_data:
blob_name = f"{iso3.lower()}_shp.zip"
container_client.upload_blob(
name=blob_name, data=zip_data, overwrite=True
)

elif zip_data:
blob_name = f"{iso3.lower()}_shp.zip"
container_client.upload_blob(
name=blob_name, data=zip_data, overwrite=True
)
else:
print(f"Skipping {iso3} due to download failure")
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tool.black]
line-length = 79

[tool.isort]
profile = "black"
line_length = 79
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[metadata]
name = src

[options]
packages = src
6 changes: 3 additions & 3 deletions src/config/era5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ start_date: 1981-01-01
end_date: Null
forecast: False
test:
start_date: 2020-01-01
end_date: 2020-02-01
iso3s: ["AFG"]
start_date: 1981-01-01
end_date: Null
iso3s: ["BDI", "NGA", "TCD"]
6 changes: 3 additions & 3 deletions src/config/floodscan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ forecast: False
extra_dims:
- band : str
test:
start_date: 2023-12-01
end_date: 2024-01-31
iso3s: ["ETH"]
start_date: 1981-01-01
end_date: Null
iso3s: ["BDI", "NGA", "TCD"]
coverage: ["DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "TCD", "COM", "COG", "CIV", "CAP", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "KEN", "LS0", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MAR", "MOZ", "NAM", "NER", "NGA", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TGO", "TUN", "UGA", "TZA", "ZMB", "ZWE"]
6 changes: 3 additions & 3 deletions src/config/imerg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ start_date: 2000-06-01
end_date: Null
forecast: False
test:
start_date: 2020-01-01
end_date: 2020-01-15
iso3s: ["ETH"]
start_date: 1981-01-01
end_date: Null
iso3s: ["BDI", "NGA", "TCD"]
6 changes: 3 additions & 3 deletions src/config/seas5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ forecast: True
extra_dims:
- leadtime : int
test:
start_date: 2024-01-01
end_date: 2024-02-01
iso3s: ["AFG"]
start_date: 1981-01-01
end_date: Null
iso3s: ["BDI", "NGA", "TCD"]

0 comments on commit b9f7ebb

Please sign in to comment.