Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For xpro, conditionally ingest topics for each resource depending on formatting #1611

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions learning_resources/etl/loaders_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
LearningResourcePlatformFactory,
LearningResourceRunFactory,
LearningResourceTopicFactory,
LearningResourceTopicMappingFactory,
PodcastEpisodeFactory,
PodcastFactory,
ProgramFactory,
Expand Down Expand Up @@ -718,6 +719,49 @@ def test_load_topics(mocker, parent_factory, topics_exist):
assert parent.learning_resource.topics.count() == 0


@pytest.mark.parametrize(
("raw_topics", "expected_topics"),
[
(["Technology:AI/Machine Learning", "Management"], ["Management"]),
(
["Technology:AI/Machine Learning", "Business:Management"],
[],
),
(["Machine Learning", "Management"], ["Machine Learning", "Management"]),
(["AI", "Machine Learning"], ["AI", "Machine Learning"]),
(
["AI", "Machine Learning", "Technology:AI/Machine Learning"],
["AI", "Machine Learning"],
),
],
)
def test_load_mixed_topics_data(raw_topics, expected_topics):
"""Test that topics are correctly parsed from data containing valid & invalid topics"""
resource = LearningResourceFactory.create(is_course=True, topics=[])
offeror = LearningResourceOfferorFactory.create(is_xpro=True)
LearningResourceTopicMappingFactory.create(
offeror=offeror,
topic=LearningResourceTopicFactory.create(name="AI"),
topic_name="AI/Machine Learning",
)
LearningResourceTopicMappingFactory.create(
offeror=offeror,
topic=LearningResourceTopicFactory.create(name="Machine Learning"),
topic_name="AI/Machine Learning",
)
LearningResourceTopicMappingFactory.create(
offeror=offeror,
topic=LearningResourceTopicFactory.create(name="Management"),
topic_name="Management",
)

load_topics(resource, [{"name": topic} for topic in raw_topics])

assert sorted([topic.name for topic in resource.topics.all()]) == sorted(
expected_topics
)


@pytest.mark.parametrize("instructor_exists", [True, False])
def test_load_instructors(instructor_exists):
"""Test that load_instructors creates and/or assigns instructors to the course run"""
Expand Down
26 changes: 2 additions & 24 deletions learning_resources/etl/xpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from learning_resources.etl.utils import (
generate_course_numbers_json,
transform_delivery,
transform_topics,
)
from main.utils import clean_data

Expand Down Expand Up @@ -52,27 +51,6 @@ def _parse_datetime(value):
return parse(value).replace(tzinfo=UTC) if value else None


def parse_topics(resource_data: dict) -> list[dict]:
"""
Get a list containing {"name": <topic>} dict objects
Args:
resource_data: course or program data
Returns:
list of dict: list containing topic dicts with a name attribute
"""
extracted_topics = resource_data["topics"]
if not extracted_topics:
return []
return transform_topics(
[
{"name": topic["name"].split(":")[-1].strip()}
for topic in extracted_topics
if topic
],
OfferedBy.xpro.name,
)


def extract_programs():
"""Loads the xPro catalog data""" # noqa: D401
if settings.XPRO_CATALOG_API_URL:
Expand Down Expand Up @@ -147,7 +125,7 @@ def _transform_learning_resource_course(course):
"published": any(
course_run.get("current_price", None) for course_run in course["courseruns"]
),
"topics": parse_topics(course),
"topics": course["topics"],
"runs": [
_transform_run(course_run, course) for course_run in course["courseruns"]
],
Expand Down Expand Up @@ -197,7 +175,7 @@ def transform_programs(programs):
program["current_price"]
), # a program is only considered published if it has a product/price
"url": program["url"],
"topics": parse_topics(program),
"topics": program["topics"],
"platform": XPRO_PLATFORM_TRANSFORM.get(program["platform"], None),
"resource_type": LearningResourceType.program.name,
"delivery": transform_delivery(program.get("format")),
Expand Down
41 changes: 4 additions & 37 deletions learning_resources/etl/xpro_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@
from learning_resources.etl.utils import (
transform_delivery,
)
from learning_resources.etl.xpro import _parse_datetime, parse_topics
from learning_resources.factories import (
LearningResourceOfferorFactory,
LearningResourceTopicFactory,
LearningResourceTopicMappingFactory,
)
from learning_resources.etl.xpro import _parse_datetime
from learning_resources.test_utils import set_up_topics
from main.test_utils import any_instance_of

Expand Down Expand Up @@ -109,7 +104,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
"published": bool(program_data["current_price"]),
"url": program_data["url"],
"availability": Availability.dated.name,
"topics": parse_topics(program_data),
"topics": program_data["topics"],
"platform": PlatformType.xpro.name,
"resource_type": LearningResourceType.program.name,
"delivery": transform_delivery(program_data.get("format")),
Expand Down Expand Up @@ -156,7 +151,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
for course_run in course_data["courseruns"]
),
"availability": Availability.dated.name,
"topics": parse_topics(course_data),
"topics": course_data["topics"],
"resource_type": LearningResourceType.course.name,
"continuing_ed_credits": course_data.get("credits"),
"pace": [Pace.self_paced.name],
Expand Down Expand Up @@ -233,7 +228,7 @@ def test_xpro_transform_courses(mock_xpro_courses_data):
for course_run in course_data["courseruns"]
),
"availability": Availability.dated.name,
"topics": parse_topics(course_data),
"topics": course_data["topics"],
"resource_type": LearningResourceType.course.name,
"runs": [
{
Expand Down Expand Up @@ -324,31 +319,3 @@ def test_program_run_start_date_value(
assert transformed_programs[0]["runs"][0]["start_date"] == _parse_datetime(
expected_dt
)


def test_parse_topics_data():
"""Test that topics are correctly parsed from the xpro data"""
offeror = LearningResourceOfferorFactory.create(is_xpro=True)
LearningResourceTopicMappingFactory.create(
offeror=offeror,
topic=LearningResourceTopicFactory.create(name="AI"),
topic_name="AI/Machine Learning",
)
LearningResourceTopicMappingFactory.create(
offeror=offeror,
topic=LearningResourceTopicFactory.create(name="Machine Learning"),
topic_name="AI/Machine Learning",
)
LearningResourceTopicMappingFactory.create(
offeror=offeror,
topic=LearningResourceTopicFactory.create(name="Management"),
topic_name="Management",
)
course_data = {
"topics": [{"name": "AI/Machine Learning"}, {"name": "Management"}],
}
assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == [
{"name": "AI"},
{"name": "Machine Learning"},
{"name": "Management"},
]
7 changes: 5 additions & 2 deletions test_json/xpro_courses.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
"courseruns": [],
"next_run_id": null,
"platform": "xPRO",
"topics": [{ "name": "Business:Leadership & Organizations" }],
"topics": [
{ "name": "Organizations & Leadership" },
{ "name": "Business:Leadership & Organizations" }
],
"format": "Online",
"availability": "dated",
"credits": "1.25"
Expand Down Expand Up @@ -38,7 +41,7 @@
}
],
"next_run_id": 49,
"topics": [{ "name": "Business:Leadership & Organizations" }],
"topics": [{ "name": "Organizations & Leadership" }],
"format": "In person",
"availability": "dated",
"credits": "2.25"
Expand Down
Loading