Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: handle prod env #18

Merged
merged 4 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion app.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
{
"cron": [
{
"command": "python cli.py load",
"command": "python cli.py load --env demo",
"schedule": "42 4 * * *"
},
{
"command": "python cli.py load --env prod",
"schedule": "42 3 * * *"
abulte marked this conversation as resolved.
Show resolved Hide resolved
}
],
"healthchecks": {
Expand Down
59 changes: 27 additions & 32 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,12 @@
from minicli import cli, run
from sqlalchemy.types import Float

from config import get_config_value
from db import get_table, get_tables, query
from metrics import compute_quality_score
from models import Bouquet, Dataset, DatasetBouquet, Organization, Rel, Resource


def get_prefix_from_env(env: str):
return "www" if env == "prod" else env


def iter_rel(rel: Rel, quiet: bool = False):
current_url = rel["href"]
if not quiet:
Expand All @@ -42,10 +39,10 @@ def iter_rel(rel: Rel, quiet: bool = False):

@cli
def load_organizations(env: str = "demo", refresh: bool = False):
prefix = get_prefix_from_env(env)
prefix = get_config_value(env, "prefix")
url = f"https://{prefix}.data.gouv.fr/api/1/organizations"
catalog = get_table("catalog")
organizations = get_table("organizations")
catalog = get_table(env, "catalog")
organizations = get_table(env, "organizations")
org_ids = set(
[
d["organization"]
Expand All @@ -70,22 +67,20 @@ def load_organizations(env: str = "demo", refresh: bool = False):


@cli
def load_bouquets(
env: str = "demo", universe_name: str = "ecospheres", include_private: bool = False
):
prefix = get_prefix_from_env(env)

catalog = get_table("catalog")
def load_bouquets(env: str = "demo", include_private: bool = False):
prefix = get_config_value(env, "prefix")
catalog = get_table(env, "catalog")

datasets_bouquets = get_table("datasets_bouquets")
datasets_bouquets = get_table(env, "datasets_bouquets")
if datasets_bouquets.exists:
datasets_bouquets.drop()

bouquets = get_table("bouquets")
bouquets = get_table(env, "bouquets")
if bouquets.exists:
# pre-set deleted, will be overwritten by actual upsert
query("UPDATE bouquets SET deleted = TRUE")
query(env, "UPDATE bouquets SET deleted = TRUE")

universe_name = get_config_value(env, "universe_name")
url = f"https://{prefix}.data.gouv.fr/api/2/topics/?tag={universe_name}"
if include_private:
url = f"{url}&include_private=yes"
Expand All @@ -102,7 +97,6 @@ def load_bouquets(
@cli
def load(
env: str = "demo",
topic_slug: str = "univers-ecospheres",
skip_related: bool = False,
skip_metrics: bool = False,
):
Expand All @@ -115,7 +109,8 @@ def load(

And compute associated metrics.
"""
prefix = get_prefix_from_env(env)
prefix = get_config_value(env, "prefix")
topic_slug = get_config_value(env, "topic_slug")
request_topic = requests.get(f"https://{prefix}.data.gouv.fr/api/2/topics/{topic_slug}/")
request_topic.raise_for_status()
topic = request_topic.json()
Expand All @@ -124,13 +119,13 @@ def load(
request_licenses.raise_for_status()
licenses = request_licenses.json()

table = get_table("catalog")
table = get_table(env, "catalog")
if table.exists:
# pre-set deleted, will be overwritten by actual upsert
query("UPDATE catalog SET deleted = TRUE")
query(env, "UPDATE catalog SET deleted = TRUE")

resources_table = get_table("resources")
if "resources" in get_tables() and not skip_related:
resources_table = get_table(env, "resources")
if "resources" in get_tables(env) and not skip_related:
resources_table.drop()

for d in iter_rel(topic["datasets"]):
Expand All @@ -142,20 +137,20 @@ def load(
resources_table.upsert(Resource.from_payload(d["id"], r), ["resource_id"])

if not skip_related:
load_organizations()
load_bouquets(include_private=True)
load_organizations(env=env)
load_bouquets(env=env, include_private=True)

if not skip_metrics:
compute_metrics()
compute_metrics(env=env)


@cli
def compute_metrics():
def compute_metrics(env: str = "demo"):
"""
Fill the time-series metrics table with today's data
"""
catalog = get_table("catalog")
metrics = get_table("metrics")
catalog = get_table(env, "catalog")
metrics = get_table(env, "metrics")
at = date.today()

def add_metric(
Expand Down Expand Up @@ -185,7 +180,7 @@ def add_metric(
agg["nb_datasets"] += nb_datasets

# average quality score per organization
add_metric("avg_quality__score", compute_quality_score(org), organization=org)
add_metric("avg_quality__score", compute_quality_score(env, org), organization=org)

for indicator in Dataset.indicators:
field = indicator["field"]
Expand All @@ -203,13 +198,13 @@ def add_metric(
add_metric(agg_key, agg_value)

# global average quality score
add_metric("avg_quality__score", compute_quality_score())
add_metric("avg_quality__score", compute_quality_score(env))

datasets_bouquets = get_table("datasets_bouquets")
datasets_bouquets = get_table(env, "datasets_bouquets")
# nb of associations bouquet <-> dataset from universe
add_metric("nb_datasets_from_universe_in_bouquets", datasets_bouquets.count())

bouquets = get_table("bouquets")
bouquets = get_table(env, "bouquets")
add_metric("nb_bouquets", bouquets.count(deleted=False))
add_metric("nb_bouquets_public", bouquets.count(private=False))
add_metric(
Expand Down
26 changes: 26 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
from typing import Literal

ConfigKeys = Literal["universe_name", "topic_slug", "prefix", "dsn"]


ENVS_CONF: dict[Literal["prod", "demo"], dict[ConfigKeys, str]] = {
"prod": {
"universe_name": "univers-ecospheres",
"topic_slug": "univers-ecospheres",
"prefix": "www",
"dsn": os.getenv("DATABASE_URL_PROD", ""),
},
"demo": {
"universe_name": "ecospheres",
"topic_slug": "univers-ecospheres",
"prefix": "demo",
"dsn": os.getenv("DATABASE_URL", ""),
},
}


def get_config_value(env: str, key: ConfigKeys) -> str:
if env not in ENVS_CONF:
raise ValueError(f"Invalid environment '{env}'.")
return ENVS_CONF[env][key]
32 changes: 15 additions & 17 deletions db.py
abulte marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,42 +1,40 @@
import os

import dataset
import dataset.table
from dataset.util import ResultIter

_db = None
from config import get_config_value

_dbs = {}

def get() -> dataset.Database:
db = get_db()

def get(env: str) -> dataset.Database:
db = get_db(env)
if db:
return db

dsn = os.getenv("DATABASE_URL")
dsn = get_config_value(env, "dsn")
if not dsn:
raise ValueError("Required DATABASE_URL env var missing.")
raise ValueError(f"Required database dsn env var missing for environment '{env}'.")

db = dataset.connect(dsn)
return db
return dataset.connect(dsn)


def get_db() -> dataset.Database | None:
return _db
def get_db(env: str) -> dataset.Database | None:
return _dbs.get(env)


def get_table(table_name: str) -> dataset.table.Table:
table = get().get_table(table_name)
def get_table(env: str, table_name: str) -> dataset.table.Table:
table = get(env).get_table(table_name)

if table is None:
raise ValueError(f"Table '{table_name}' does not exist.")

return table


def get_tables() -> list[str]:
return get().tables
def get_tables(env: str) -> list[str]:
return get(env).tables


def query(q: str, *args, **kwargs) -> ResultIter:
return get().query(q, *args, **kwargs)
def query(env: str, q: str, *args, **kwargs) -> ResultIter:
return get(env).query(q, *args, **kwargs)
4 changes: 2 additions & 2 deletions metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def quality_score_query(organization: str | None = None) -> tuple[str, dict]:
return q, kwargs


def compute_quality_score(organization: str | None = None) -> float | None:
def compute_quality_score(env: str, organization: str | None = None) -> float | None:
q, kwargs = quality_score_query(organization)
avg_quality__score = next(query(q, **kwargs))
avg_quality__score = next(query(env, q, **kwargs))
return avg_quality__score["mean_score"] if avg_quality__score else None
13 changes: 13 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

from config import get_config_value


def test_wrong_env():
with pytest.raises(ValueError):
get_config_value("wrong_env", "universe_name")


def test_get_simple_value():
assert get_config_value("demo", "universe_name") == "ecospheres"
assert get_config_value("prod", "universe_name") == "univers-ecospheres"
11 changes: 5 additions & 6 deletions tests/test_db.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from unittest.mock import Mock, patch

import pytest
Expand All @@ -8,13 +7,13 @@

@patch("db.get_db", Mock(return_value=True))
def test_get_return_db_if_it_truthy():
assert get() is True
assert get("test") is True


@patch("dataset.connect", Mock(return_value={"database": True}))
@patch("db.get_config_value", Mock(return_value="test"))
def test_get_return_fresh_db():
os.environ["DATABASE_URL"] = "some"
assert get() == {"database": True}
assert get("test") == {"database": True}


def test_get_table_raise_if_received_none():
Expand All @@ -24,7 +23,7 @@ def get_table(self, table_name: str):

with patch("db.get", return_value=Mock()):
with pytest.raises(ValueError):
get_table("table_name")
get_table("test", "table_name")


def test_get_table_return():
Expand All @@ -33,4 +32,4 @@ def get_table(self, table_name: str):
return {"table": True}

with patch("db.get", return_value=Mock()):
assert get_table("table_name") == {"table": True}
assert get_table("test", "table_name") == {"table": True}
5 changes: 4 additions & 1 deletion tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@

def test_compute_quality_score():
class MockDb:
def __init__(self, env: str) -> None:
pass

def query(self, *args, **kwargs):
yield {"mean_score": 0.5}

with patch("db.get", MockDb):
assert compute_quality_score() == 0.5
assert compute_quality_score("test") == 0.5


def test_quality_score_query():
Expand Down
Loading