-
Notifications
You must be signed in to change notification settings - Fork 163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Run migrations to fix incorrect source fields of contentnodes #4720
Changes from 7 commits
c9231fe
a8adee4
1d867c5
5641e14
c02201b
71ce33b
37b3181
9ad8ce7
a1aaedd
0f1518f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS | ||
import datetime | ||
import uuid | ||
|
||
from django.core.management import call_command | ||
from django.utils import timezone | ||
from le_utils.constants import content_kinds | ||
|
||
from contentcuration.models import Channel | ||
from contentcuration.models import ContentNode | ||
from contentcuration.models import License | ||
from contentcuration.tests import testdata | ||
from contentcuration.tests.base import StudioAPITestCase | ||
from contentcuration.utils.publish import publish_channel | ||
|
||
|
||
class TestRectifyMigrationCommand(StudioAPITestCase): | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
super(TestRectifyMigrationCommand, cls).setUpClass() | ||
|
||
def tearDown(self): | ||
super(TestRectifyMigrationCommand, self).tearDown() | ||
|
||
def setUp(self): | ||
super(TestRectifyMigrationCommand, self).setUp() | ||
self.original_channel = testdata.channel() | ||
self.license_original = License.objects.all()[0] | ||
self.original_contentnode = ContentNode.objects.create( | ||
id=uuid.uuid4().hex, | ||
title="Original Node", | ||
parent=self.original_channel.main_tree, | ||
license=self.license_original, | ||
original_channel_id=None, | ||
source_channel_id=None, | ||
author="old author" | ||
) | ||
self.user = testdata.user() | ||
self.original_channel.editors.add(self.user) | ||
self.client.force_authenticate(user=self.user) | ||
|
||
def create_base_channel_and_contentnode(self, source_contentnode, source_channel): | ||
base_channel = testdata.channel() | ||
base_channel.public = True | ||
base_channel.save() | ||
license_changed = License.objects.all()[2] | ||
base_node = ContentNode.objects.create( | ||
id=uuid.uuid4().hex, | ||
title="base contentnode", | ||
parent=base_channel.main_tree, | ||
kind_id=content_kinds.VIDEO, | ||
original_channel_id=self.original_channel.id, | ||
original_source_node_id=self.original_contentnode.node_id, | ||
source_channel_id=source_channel.id, | ||
source_node_id=source_contentnode.node_id, | ||
author="source author", | ||
license=license_changed, | ||
) | ||
return base_node, base_channel | ||
|
||
def create_source_channel_and_contentnode(self): | ||
source_channel = testdata.channel() | ||
source_channel.public = True | ||
source_channel.save() | ||
license_changed = License.objects.all()[1] | ||
source_node = ContentNode.objects.create( | ||
id=uuid.uuid4().hex, | ||
title="base contentnode", | ||
parent=source_channel.main_tree, | ||
kind_id=content_kinds.VIDEO, | ||
license=license_changed, | ||
original_channel_id=self.original_channel.id, | ||
source_channel_id=self.original_channel.id, | ||
source_node_id=self.original_contentnode.node_id, | ||
original_source_node_id=self.original_contentnode.node_id, | ||
author="source author", | ||
) | ||
|
||
return source_node, source_channel | ||
|
||
def run_migrations(self): | ||
call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) | ||
|
||
def test_two_node_case(self): | ||
base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) | ||
|
||
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) | ||
|
||
# main_tree node still has changed=true even after the publish | ||
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): | ||
print(node.id) | ||
print(node.id == base_channel.main_tree.id) | ||
print("checking if the node is complete or not ", node.complete) | ||
node.changed = False | ||
# This should probably again change the changed=true but suprisingly it doesnot | ||
# Meaning the changed boolean doesnot change for the main_tree no matter what we do | ||
# through ContentNode model methods like save. | ||
node.save() | ||
|
||
ContentNode.objects.filter(pk=base_node.pk).update( | ||
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One suggestion would be to use the same copy/import utilities that we use elsewhere, then override the things that shouldn't have changed, but it isn't a big deal. From my perspective, I like to do my best to ensure the tests are founded upon the app's behaviors as much as possible, because too many differences could cause the tests to pass when they shouldn't (under the typical behaviors of the app) |
||
|
||
self.run_migrations() | ||
updated_base_node = ContentNode.objects.get(pk=base_node.pk) | ||
self.assertEqual(updated_base_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_base_node.author, self.original_contentnode.author) | ||
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) | ||
|
||
def test_three_node_case_implicit(self): | ||
source_node, source_channel = self.create_source_channel_and_contentnode() | ||
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) | ||
source_node.aggregator = "Nami" | ||
source_node.save() | ||
# Implicit case | ||
base_node.author = source_node.author | ||
base_node.license = source_node.license | ||
base_node.aggregator = source_node.aggregator | ||
base_node.save() | ||
|
||
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) | ||
|
||
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): | ||
print(node.id) | ||
print(node.id == base_channel.main_tree.id) | ||
print("checking if the node is complete or not ", node.complete) | ||
node.changed = False | ||
node.save() | ||
|
||
ContentNode.objects.filter(pk=base_node.pk).update( | ||
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
ContentNode.objects.filter(pk=source_node.pk).update( | ||
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
self.run_migrations() | ||
updated_base_node = ContentNode.objects.get(pk=base_node.pk) | ||
updated_source_node = ContentNode.objects.get(pk=source_node.pk) | ||
self.assertEqual(updated_base_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_base_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_source_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_source_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator) | ||
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) | ||
|
||
def test_three_node_case_explicit(self): | ||
source_node, source_channel = self.create_source_channel_and_contentnode() | ||
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) | ||
source_node.provider = "luffy" | ||
base_node.provider = "zoro" | ||
base_node.save() | ||
source_node.save() | ||
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) | ||
|
||
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): | ||
print(node.id) | ||
print(node.id == base_channel.main_tree.id) | ||
print("checking if the node is complete or not ", node.complete) | ||
node.changed = False | ||
node.save() | ||
|
||
ContentNode.objects.filter(pk=base_node.pk).update( | ||
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
ContentNode.objects.filter(pk=source_node.pk).update( | ||
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
self.run_migrations() | ||
updated_base_node = ContentNode.objects.get(pk=base_node.pk) | ||
updated_source_node = ContentNode.objects.get(pk=source_node.pk) | ||
self.assertEqual(updated_base_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_base_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_base_node.provider, self.original_contentnode.provider) | ||
self.assertEqual(updated_source_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_source_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_source_node.provider, self.original_contentnode.provider) | ||
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
import datetime | ||
import logging | ||
|
||
from django.core.management.base import BaseCommand | ||
from django.db.models import Exists | ||
from django.db.models import F | ||
from django.db.models import OuterRef | ||
from django.db.models import Q | ||
from django.db.models import Value | ||
from django.db.models.functions import Coalesce | ||
from django.utils import timezone | ||
from django_cte import With | ||
|
||
from contentcuration.models import Channel | ||
from contentcuration.models import ContentNode | ||
from contentcuration.utils.publish import publish_channel | ||
|
||
logger = logging.getLogger(__file__) | ||
|
||
|
||
class Command(BaseCommand): | ||
|
||
def add_arguments(self, parser): | ||
|
||
parser.add_argument( | ||
'--is_test', | ||
action='store_true', | ||
help="Indicate if the command is running in a test environment.", | ||
) | ||
|
||
parser.add_argument( | ||
'--user_id', | ||
type=int, | ||
help="User ID for the operation", | ||
required=True | ||
) | ||
|
||
def handle(self, *args, **options): | ||
# Filter Date : July 9, 2023 | ||
# Link https://github.com/learningequality/studio/pull/4189 | ||
# The PR date for the frontend change is July 10, 2023 | ||
# we would set the filter day one day back just to be sure | ||
|
||
is_test = options['is_test'] | ||
user_id = options['user_id'] | ||
|
||
filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) | ||
main_trees_cte = With( | ||
( | ||
Channel.objects.filter( | ||
main_tree__isnull=False | ||
) | ||
.annotate(channel_id=F("id")) | ||
.values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) | ||
), | ||
name="main_trees", | ||
) | ||
|
||
nodes = main_trees_cte.join( | ||
ContentNode.objects.all(), | ||
tree_id=main_trees_cte.col.tree_id, | ||
).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) | ||
|
||
original_source_nodes = ( | ||
nodes.with_cte(main_trees_cte) | ||
.filter( | ||
node_id=OuterRef("original_source_node_id"), | ||
) | ||
.exclude( | ||
tree_id=OuterRef("tree_id"), | ||
) | ||
.annotate( | ||
coalesced_provider=Coalesce("provider", Value("")), | ||
coalesced_author=Coalesce("author", Value("")), | ||
coalesced_aggregator=Coalesce("aggregator", Value("")), | ||
coalesced_license_id=Coalesce("license_id", -1), | ||
) | ||
) | ||
# We filter out according to last_modified date before this PR | ||
# https://github.com/learningequality/studio/pull/4189 | ||
# As we want to lossen up the public=True Filter and open the | ||
# migration for all the nodes even if they are not published | ||
diff = ( | ||
nodes.with_cte(main_trees_cte).filter( | ||
deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes | ||
source_node_id__isnull=False, | ||
original_source_node_id__isnull=False, | ||
modified__lt=filter_date | ||
) | ||
).annotate( | ||
coalesced_provider=Coalesce("provider", Value("")), | ||
coalesced_author=Coalesce("author", Value("")), | ||
coalesced_aggregator=Coalesce("aggregator", Value("")), | ||
coalesced_license_id=Coalesce("license_id", -1), | ||
) | ||
diff_combined = diff.annotate( | ||
original_source_node_f_changed=Exists( | ||
original_source_nodes.filter( | ||
~Q(coalesced_provider=OuterRef("coalesced_provider")) | ||
| ~Q(coalesced_author=OuterRef("coalesced_author")) | ||
| ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) | ||
| ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) | ||
) | ||
) | ||
).filter(original_source_node_f_changed=True) | ||
|
||
final_nodes = diff_combined.values( | ||
"id", | ||
"channel_id", | ||
"original_channel_id", | ||
"original_source_node_id", | ||
"coalesced_provider", | ||
"coalesced_author", | ||
"coalesced_aggregator", | ||
"coalesced_license_id", | ||
"original_source_node_f_changed", | ||
).order_by() | ||
|
||
for item in final_nodes: | ||
base_node = ContentNode.objects.get(pk=item["id"]) | ||
|
||
original_source_channel_id = item["original_channel_id"] | ||
original_source_node_id = item["original_source_node_id"] | ||
tree_id = ( | ||
Channel.objects.filter(pk=original_source_channel_id) | ||
.values_list("main_tree__tree_id", flat=True) | ||
.get() | ||
) | ||
original_source_node = ContentNode.objects.filter( | ||
tree_id=tree_id, node_id=original_source_node_id | ||
) | ||
|
||
base_channel = Channel.objects.get(pk=item['channel_id']) | ||
|
||
to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) | ||
|
||
if original_source_channel_id is not None and original_source_node.exists(): | ||
# original source node exists and its source fields dont match | ||
# update the base node | ||
if base_node.author != original_source_node[0].author: | ||
base_node.author = original_source_node[0].author | ||
if base_node.provider != original_source_node[0].provider: | ||
base_node.provider = original_source_node[0].provider | ||
if base_node.aggregator != original_source_node[0].aggregator: | ||
base_node.aggregator = original_source_node[0].aggregator | ||
if base_node.license != original_source_node[0].license: | ||
base_node.license = original_source_node[0].license | ||
base_node.save() | ||
if to_be_republished and base_channel.last_published is not None: | ||
ozer550 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# we would repbulish the channel | ||
# Adding for testing | ||
if is_test: | ||
publish_channel(user_id, base_channel.id) | ||
else: | ||
publish_channel("SOME ID", base_channel.id, base_channel.id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My only uncertainty here is whether we should be running the publish as the user whose channel it is, or as some administrator. The administrator means that we can always reliably run the publish as the same user, but does mean that it now appears that someone else has published the channel. The only other thing that comes to mind is that if we ran as the channel editor, they would receive an email indicating to them that their channel had been republished. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The email draft that will be sent by imps team mentions that "WE" would be the one doing the change so the administrator seems more reliable option here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Altho this email is not sent to editors of channels which are not public, so they might be confused about whats happening. When as channel editor the sending email event can be avoided, as by default |
||
else: | ||
continue |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In case this helps this feel less surprising: https://github.com/learningequality/studio/blob/unstable/contentcuration/contentcuration/models.py#L1831
We have an explicit exclude list of fields for which updates to them do not trigger change to be set as
True
.