From 8aac1747732efe878ab1747fb777be017724daf3 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 5 Jun 2024 14:41:50 +0000 Subject: [PATCH 1/3] management: add atomic transation to populate diff_id fields --- .../main_app/management/commands/populate_diff_id_fields.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py b/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py index c6d0a099a..0c623bc04 100644 --- a/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py +++ b/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py @@ -2,9 +2,11 @@ from django.core.management.base import BaseCommand from django.db.models import Q from typing import Optional +from django.db import transaction class Command(BaseCommand): + @transaction.atomic def handle(self, *args, **kwargs): CHUNK_SIZE = 500 chants = Chant.objects.filter( From ae31659e19c5eb68392118bacb856941d84e79ae Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 5 Jun 2024 16:51:01 +0000 Subject: [PATCH 2/3] management: include iterator for chant queryset --- .../commands/populate_diff_id_fields.py | 59 +++++++++---------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py b/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py index 0c623bc04..f53eb9dc3 100644 --- a/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py +++ b/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py @@ -8,40 +8,39 @@ class Command(BaseCommand): @transaction.atomic def handle(self, *args, **kwargs): - CHUNK_SIZE = 500 chants = Chant.objects.filter( Q(differentiae_database__isnull=False) & Q(diff_db__isnull=True) - ) - chants_count = chants.count() - start_index = 0 + ).iterator(chunk_size=500) + chants_total = Chant.objects.filter( + Q(differentiae_database__isnull=False) & Q(diff_db__isnull=True) + ).count() count = 0 - while start_index <= chants_count: - self.stdout.write(f"processing chunk with {start_index=}") - chunk = chants[start_index : start_index + CHUNK_SIZE] - - for chant in chunk: - try: - differentia_id: Optional[str] = chant.differentiae_database - differentia = Differentia.objects.get(differentia_id=differentia_id) - if differentia: - chant.diff_db = differentia - else: - # If the Differentia doesn't exist, create a new one - differentia = Differentia( - differentia_id=differentia_id, - ) - differentia.save() - chant.diff_db = differentia - chant.save() - except Differentia.DoesNotExist: - print(f"Differentia not found for chant: {chant}") - count += 1 - if count % 100 == 0: - print( - f"------------------ {count} of {chants_count} chants updated ------------------" + chant: Chant + for chant in chants: + try: + differentia_id: Optional[str] = chant.differentiae_database + differentia = Differentia.objects.get(differentia_id=differentia_id) + if differentia: + chant.diff_db = differentia + else: + # If the Differentia doesn't exist, create a new one + differentia = Differentia( + differentia_id=differentia_id, + ) + differentia.save() + chant.diff_db = differentia + chant.save() + except Differentia.DoesNotExist: + self.stdout.write( + self.style.WARNING(f"Differentia not found for chant: {chant}") + ) + count += 1 + if count % 100 == 0: + self.stdout.write( + self.style.SUCCESS( + f"------------------ {count} of {chants_total} chants updated ------------------" ) - del chunk # make sure we don't use too much RAM - start_index += CHUNK_SIZE + ) self.stdout.write( self.style.SUCCESS("Success! Command has run to completion.\n") From 390cb57a6d7b6da7018371660998efa444b83d89 Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 6 Aug 2024 16:39:42 +0000 Subject: [PATCH 3/3] fix(management): ensure Differentia is created if not found - in populate_diff_id_fields.py - removed obsolete else block --- .../commands/populate_diff_id_fields.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py b/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py index f53eb9dc3..b99fb6b09 100644 --- a/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py +++ b/django/cantusdb_project/main_app/management/commands/populate_diff_id_fields.py @@ -15,25 +15,22 @@ def handle(self, *args, **kwargs): Q(differentiae_database__isnull=False) & Q(diff_db__isnull=True) ).count() count = 0 - chant: Chant + for chant in chants: try: differentia_id: Optional[str] = chant.differentiae_database differentia = Differentia.objects.get(differentia_id=differentia_id) - if differentia: - chant.diff_db = differentia - else: - # If the Differentia doesn't exist, create a new one - differentia = Differentia( - differentia_id=differentia_id, - ) - differentia.save() - chant.diff_db = differentia - chant.save() except Differentia.DoesNotExist: + # If the Differentia doesn't exist, create a new one + differentia = Differentia(differentia_id=differentia_id) + differentia.save() self.stdout.write( - self.style.WARNING(f"Differentia not found for chant: {chant}") + self.style.WARNING(f"Differentia created for chant: {chant}") ) + + chant.diff_db = differentia + chant.save() + count += 1 if count % 100 == 0: self.stdout.write(