mitodl · mbertrand · Sep 26, 2024 · Oct 1, 2024
diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py
@@ -60,6 +60,7 @@
     LearningResourcePlatformFactory,
     LearningResourceRunFactory,
     LearningResourceTopicFactory,
+    LearningResourceTopicMappingFactory,
     PodcastEpisodeFactory,
     PodcastFactory,
     ProgramFactory,
@@ -718,6 +719,49 @@ def test_load_topics(mocker, parent_factory, topics_exist):
     assert parent.learning_resource.topics.count() == 0
 
 
+@pytest.mark.parametrize(
+    ("raw_topics", "expected_topics"),
+    [
+        (["Technology:AI/Machine Learning", "Management"], ["Management"]),
+        (
+            ["Technology:AI/Machine Learning", "Business:Management"],
+            [],
+        ),
+        (["Machine Learning", "Management"], ["Machine Learning", "Management"]),
+        (["AI", "Machine Learning"], ["AI", "Machine Learning"]),
+        (
+            ["AI", "Machine Learning", "Technology:AI/Machine Learning"],
+            ["AI", "Machine Learning"],
+        ),
+    ],
+)
+def test_load_mixed_topics_data(raw_topics, expected_topics):
+    """Test that topics are correctly parsed from data containing valid & invalid topics"""
+    resource = LearningResourceFactory.create(is_course=True, topics=[])
+    offeror = LearningResourceOfferorFactory.create(is_xpro=True)
+    LearningResourceTopicMappingFactory.create(
+        offeror=offeror,
+        topic=LearningResourceTopicFactory.create(name="AI"),
+        topic_name="AI/Machine Learning",
+    )
+    LearningResourceTopicMappingFactory.create(
+        offeror=offeror,
+        topic=LearningResourceTopicFactory.create(name="Machine Learning"),
+        topic_name="AI/Machine Learning",
+    )
+    LearningResourceTopicMappingFactory.create(
+        offeror=offeror,
+        topic=LearningResourceTopicFactory.create(name="Management"),
+        topic_name="Management",
+    )
+
+    load_topics(resource, [{"name": topic} for topic in raw_topics])
+
+    assert sorted([topic.name for topic in resource.topics.all()]) == sorted(
+        expected_topics
+    )
+
+
 @pytest.mark.parametrize("instructor_exists", [True, False])
 def test_load_instructors(instructor_exists):
     """Test that load_instructors creates and/or assigns instructors to the course run"""

diff --git a/learning_resources/etl/xpro.py b/learning_resources/etl/xpro.py
@@ -20,7 +20,6 @@
 from learning_resources.etl.utils import (
     generate_course_numbers_json,
     transform_delivery,
-    transform_topics,
 )
 from main.utils import clean_data
 
@@ -52,27 +51,6 @@ def _parse_datetime(value):
     return parse(value).replace(tzinfo=UTC) if value else None
 
 
-def parse_topics(resource_data: dict) -> list[dict]:
-    """
-    Get a list containing {"name": <topic>} dict objects
-    Args:
-        resource_data: course or program data
-    Returns:
-        list of dict: list containing topic dicts with a name attribute
-    """
-    extracted_topics = resource_data["topics"]
-    if not extracted_topics:
-        return []
-    return transform_topics(
-        [
-            {"name": topic["name"].split(":")[-1].strip()}
-            for topic in extracted_topics
-            if topic
-        ],
-        OfferedBy.xpro.name,
-    )
-
-
 def extract_programs():
     """Loads the xPro catalog data"""  # noqa: D401
     if settings.XPRO_CATALOG_API_URL:
@@ -147,7 +125,7 @@ def _transform_learning_resource_course(course):
         "published": any(
             course_run.get("current_price", None) for course_run in course["courseruns"]
         ),
-        "topics": parse_topics(course),
+        "topics": course["topics"],
         "runs": [
             _transform_run(course_run, course) for course_run in course["courseruns"]
         ],
@@ -197,7 +175,7 @@ def transform_programs(programs):
                 program["current_price"]
             ),  # a program is only considered published if it has a product/price
             "url": program["url"],
-            "topics": parse_topics(program),
+            "topics": program["topics"],
             "platform": XPRO_PLATFORM_TRANSFORM.get(program["platform"], None),
             "resource_type": LearningResourceType.program.name,
             "delivery": transform_delivery(program.get("format")),

diff --git a/learning_resources/etl/xpro_test.py b/learning_resources/etl/xpro_test.py
@@ -20,12 +20,7 @@
 from learning_resources.etl.utils import (
     transform_delivery,
 )
-from learning_resources.etl.xpro import _parse_datetime, parse_topics
-from learning_resources.factories import (
-    LearningResourceOfferorFactory,
-    LearningResourceTopicFactory,
-    LearningResourceTopicMappingFactory,
-)
+from learning_resources.etl.xpro import _parse_datetime
 from learning_resources.test_utils import set_up_topics
 from main.test_utils import any_instance_of
 
@@ -109,7 +104,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
             "published": bool(program_data["current_price"]),
             "url": program_data["url"],
             "availability": Availability.dated.name,
-            "topics": parse_topics(program_data),
+            "topics": program_data["topics"],
             "platform": PlatformType.xpro.name,
             "resource_type": LearningResourceType.program.name,
             "delivery": transform_delivery(program_data.get("format")),
@@ -156,7 +151,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data):
                         for course_run in course_data["courseruns"]
                     ),
                     "availability": Availability.dated.name,
-                    "topics": parse_topics(course_data),
+                    "topics": course_data["topics"],
                     "resource_type": LearningResourceType.course.name,
                     "continuing_ed_credits": course_data.get("credits"),
                     "pace": [Pace.self_paced.name],
@@ -233,7 +228,7 @@ def test_xpro_transform_courses(mock_xpro_courses_data):
                 for course_run in course_data["courseruns"]
             ),
             "availability": Availability.dated.name,
-            "topics": parse_topics(course_data),
+            "topics": course_data["topics"],
             "resource_type": LearningResourceType.course.name,
             "runs": [
                 {
@@ -324,31 +319,3 @@ def test_program_run_start_date_value(
     assert transformed_programs[0]["runs"][0]["start_date"] == _parse_datetime(
         expected_dt
     )
-
-
-def test_parse_topics_data():
-    """Test that topics are correctly parsed from the xpro data"""
-    offeror = LearningResourceOfferorFactory.create(is_xpro=True)
-    LearningResourceTopicMappingFactory.create(
-        offeror=offeror,
-        topic=LearningResourceTopicFactory.create(name="AI"),
-        topic_name="AI/Machine Learning",
-    )
-    LearningResourceTopicMappingFactory.create(
-        offeror=offeror,
-        topic=LearningResourceTopicFactory.create(name="Machine Learning"),
-        topic_name="AI/Machine Learning",
-    )
-    LearningResourceTopicMappingFactory.create(
-        offeror=offeror,
-        topic=LearningResourceTopicFactory.create(name="Management"),
-        topic_name="Management",
-    )
-    course_data = {
-        "topics": [{"name": "AI/Machine Learning"}, {"name": "Management"}],
-    }
-    assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == [
-        {"name": "AI"},
-        {"name": "Machine Learning"},
-        {"name": "Management"},
-    ]
diff --git a/test_json/xpro_courses.json b/test_json/xpro_courses.json
@@ -9,7 +9,10 @@
     "courseruns": [],
     "next_run_id": null,
     "platform": "xPRO",
-    "topics": [{ "name": "Business:Leadership & Organizations" }],
+    "topics": [
+      { "name": "Organizations & Leadership" },
+      { "name": "Business:Leadership & Organizations" }
+    ],
     "format": "Online",
     "availability": "dated",
     "credits": "1.25"
@@ -38,7 +41,7 @@
       }
     ],
     "next_run_id": 49,
-    "topics": [{ "name": "Business:Leadership & Organizations" }],
+    "topics": [{ "name": "Organizations & Leadership" }],
     "format": "In person",
     "availability": "dated",
     "credits": "2.25"