Skip to content

Commit

Permalink
More initial commits
Browse files Browse the repository at this point in the history
  • Loading branch information
b-j-mills committed Dec 10, 2024
1 parent 2f36430 commit 7809594
Show file tree
Hide file tree
Showing 5 changed files with 4,044 additions and 25 deletions.
14 changes: 9 additions & 5 deletions src/hdx/scraper/gdacs/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
)
from hdx.utilities.retriever import Retrieve

from src.hdx.scraper.gdacs.gdacs import GDACS

logger = logging.getLogger(__name__)

_USER_AGENT_LOOKUP = "hdx-scraper-gdacs"
_SAVED_DATA_DIR = "saved_data" # Keep in repo to avoid deletion in /tmp
_UPDATED_BY_SCRIPT = "HDX Scraper: gdacs"
_UPDATED_BY_SCRIPT = "HDX Scraper: GDACS"


def main(
Expand Down Expand Up @@ -48,9 +50,10 @@ def main(
use_saved=use_saved,
)
configuration = Configuration.read()
#
# Steps to generate dataset
#
gdacs = GDACS(configuration, retriever)
gdacs.get_data()

dataset = gdacs.generate_dataset()
dataset.update_from_yaml(
path=join(
dirname(__file__), "config", "hdx_dataset_static.yaml"
Expand All @@ -72,5 +75,6 @@ def main(
user_agent_config_yaml=join(expanduser("~"), ".useragents.yaml"),
user_agent_lookup=_USER_AGENT_LOOKUP,
project_config_yaml=join(
dirname(__file__), "config", "project_configuration.yaml"),
dirname(__file__), "config", "project_configuration.yaml"
),
)
2 changes: 2 additions & 0 deletions src/hdx/scraper/gdacs/config/project_configuration.yaml
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# Collector specific configuration

base_url: "https://www.gdacs.org/XML/RSS.xml"
24 changes: 13 additions & 11 deletions src/hdx/scraper/gdacs/gdacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"""gdacs scraper"""

import logging
from typing import List, Optional

from typing import Optional

from feedparser import parse
from hdx.api.configuration import Configuration
from hdx.data.dataset import Dataset
from hdx.data.hdxobject import HDXError
Expand All @@ -13,18 +13,18 @@
logger = logging.getLogger(__name__)


class gdacs:

def __init__(
self, configuration: Configuration, retriever: Retrieve, temp_dir: str
):
class GDACS:
def __init__(self, configuration: Configuration, retriever: Retrieve):
self._configuration = configuration
self._retriever = retriever
self._temp_dir = temp_dir
self.data = {}

def get_data(self) -> None:
text = self._retriever.download_text(self._configuration["base_url"])
feed = parse(text)
return

def generate_dataset(self) -> Optional[Dataset]:

# To be generated
dataset_name = None
dataset_title = None
Expand All @@ -41,13 +41,15 @@ def generate_dataset(self) -> Optional[Dataset]:
)

dataset.set_time_period(dataset_time_period)
dataset.add_tagsa(dataset_tags)
dataset.add_tags(dataset_tags)
# Only if needed
dataset.set_subnational(True)
try:
dataset.add_country_location(dataset_country_iso3)
except HDXError:
logger.error(f"Couldn't find country {dataset_country_iso3}, skipping")
logger.error(
f"Couldn't find country {dataset_country_iso3}, skipping"
)
return

# Add resources here
Expand Down
Loading

0 comments on commit 7809594

Please sign in to comment.