Skip to content

Commit

Permalink
Merge branch 'beta' into require-auth-at-project-level
Browse files Browse the repository at this point in the history
  • Loading branch information
noliveleger committed Nov 23, 2023
2 parents 4c6bb93 + a7e6380 commit 87054fb
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

from datetime import timedelta

from django.core.management.base import BaseCommand
from django.db.models import F

from onadata.apps.logger.models import (
Attachment,
Expand All @@ -19,35 +22,107 @@ def add_arguments(self, parser):
'--chunks',
type=int,
default=2000,
help="Number of records to process per query"
help='Number of records to process per query'
)

parser.add_argument(
'--start-id',
type=int,
default=0,
help='Instance ID to start from'
)

parser.add_argument(
'--start-date',
type=str,
default=None,
help='Starting date to start from. Format: yyyy-mm-aa.'
)

parser.add_argument(
'--not-edited-offset',
type=int,
default=10,
help=(
'Offset in seconds between creation date and modification date '
'to consider submissions as not edited'
)
)

def handle(self, *args, **kwargs):
chunks = kwargs['chunks']
verbosity = kwargs['verbosity']
start_id = kwargs['start_id']
start_date = kwargs['start_date']
offset = kwargs['not_edited_offset']

self.stdout.write(
'⚠ Warning! This management can take a while (i.e. several days) '
'to run on big databases'
)

instance_ids = Attachment.objects.values_list(
'instance_id', flat=True
).distinct()

queryset = Attachment.objects.values_list('instance_id', flat=True).distinct()
if start_id:
instance_ids = instance_ids.filter(instance_id__gte=start_id)

queryset = (
Instance.objects.only('xml', 'xform')
.filter(pk__in=instance_ids)
.exclude(
date_modified__lt=F('date_created')
+ timedelta(seconds=offset),
)
)

if start_id:
queryset = queryset.filter(pk__gte=start_id)

if start_date:
queryset = queryset.filter(date_created__date__gte=start_date)

if verbosity > 1:
self.stdout.write(
f'Calculating number of instance with attachments…'
f'Calculating number of Instance objects with attachments…'
)
instances_count = queryset.count()

cpt = 1
queryset = queryset.order_by('pk')

if verbosity > 1:
self.stdout.write(
f'Retrieving Instance objects…'
)

for instance_id in queryset.iterator(chunk_size=chunks):
instance = Instance.objects.get(pk=instance_id)
for instance in queryset.iterator(chunk_size=chunks):
if verbosity > 0:
message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
self.stdout.write(
f'Processing Instance object #{instance_id}{message}…'
f'Processing Instance object #{instance.pk}{message}…'
)
soft_deleted_attachments = get_soft_deleted_attachments(instance)

try:
soft_deleted_attachments = get_soft_deleted_attachments(instance)
except Exception as e:
cpt += 1
if verbosity > 0:
self.stderr.write(f'\tError: {str(e)}')
continue

for soft_deleted_attachment in soft_deleted_attachments:
# Avoid fetching Instance object once again
soft_deleted_attachment.instance = instance
pre_delete_attachment(
soft_deleted_attachment, only_update_counters=True
)

if verbosity > 1:
message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
self.stdout.write(
f'\tInstance object #{instance.pk}{message} updated!'
)
cpt += 1

self.stdout.write('Done!')
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from __future__ import annotations

from django.core.management.base import BaseCommand
from django.core.management import call_command
from django.db.models import F, Q

from onadata.apps.logger.models import (
Attachment,
Instance,
XForm,
)
from onadata.apps.main.models import UserProfile


class Command(BaseCommand):

help = (
'Undelete background audio files and audit logs previously soft-deleted'
' by a bug introduced in release 2.023.37c'
)

def add_arguments(self, parser):
parser.add_argument(
'--chunks',
type=int,
default=2000,
help='Number of records to process per query'
)

parser.add_argument(
'--force',
action='store_true',
default=False,
help='Run the management command even if no attachments are affected'
)

def handle(self, *args, **kwargs):
chunks = kwargs['chunks']
verbosity = kwargs['verbosity']
force = kwargs['force']

self.stdout.write(
'⚠ Warning! This management can take a while (i.e. several hours) '
'to run on big databases'
)

queryset = Attachment.all_objects.filter(
Q(media_file_basename='audit.csv')
| Q(media_file_basename__endswith='.enc')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$'),
deleted_at__isnull=False,
)

if not queryset.exists() and not force:
self.stdout.write(
'No background recording or audit logs seem to be affected'
)
return

att_queryset = Attachment.all_objects.filter(
Q(media_file_basename='audit.csv')
| Q(media_file_basename__endswith='.enc')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$')
)
if not force:
att_queryset = att_queryset.filter(deleted_at__isnull=False)

instance_ids = list(
att_queryset.values_list('instance_id', flat=True).distinct()
)

if verbosity > 1:
instances_count = len(instance_ids)
self.stdout.write(f'Instances to process: {instances_count}…')

cpt = 1

instances = Instance.objects.filter(pk__in=instance_ids).order_by('id')
for instance in instances.iterator(chunk_size=chunks):
message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
if verbosity:
self.stdout.write(
f'Processing instance #{instance.pk}{message}…'
)
Attachment.all_objects.filter(
Q(media_file_basename='audit.csv')
| Q(media_file_basename__endswith='.enc')
| Q(media_file_basename__regex=r'^\d+\.(m4a|amr)$'),
instance_id=instance.pk,
).update(deleted_at=None)
try:
instance.parsed_instance.update_mongo()
except Instance.parsed_instance.RelatedObjectDoesNotExist:
pass
cpt += 1

if verbosity:
self.stdout.write(
f'Updating storage counters…'
)
# Attachment storage counters need to be updated.
xform_ids = (
Instance.objects.filter(pk__in=instance_ids)
.values_list('xform_id', flat=True)
.distinct()
)

# Update related profile counters with a wrong value to let
# the management command `update_attachment_storage_byte` find them
# when calling with `--sync` option.
UserProfile.objects.filter(
user_id__in=XForm.objects.filter(
pk__in=list(xform_ids)
).values_list('user_id', flat=True)
).update(attachment_storage_bytes=F('attachment_storage_bytes') - 1)

call_command(
'update_attachment_storage_bytes', verbosity=verbosity, sync=True
)

self.stdout.write('Done!')
2 changes: 1 addition & 1 deletion onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ class Instance(models.Model):
default='submitted_via_web')
uuid = models.CharField(max_length=249, default='', db_index=True)

# store an geographic objects associated with this instance
# store a geographic objects associated with this instance
geom = models.GeometryCollectionField(null=True)

tags = TaggableManager()
Expand Down
14 changes: 10 additions & 4 deletions onadata/apps/logger/models/xform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
import re
from copy import deepcopy
from io import BytesIO
from xml.sax.saxutils import escape as xml_escape

Expand Down Expand Up @@ -132,10 +133,15 @@ def url(self):
}
)

def data_dictionary(self):
from onadata.apps.viewer.models.data_dictionary import\
DataDictionary
return DataDictionary.all_objects.get(pk=self.pk)
def data_dictionary(self, use_cache: bool = False):
from onadata.apps.viewer.models.data_dictionary import DataDictionary

if not use_cache:
return DataDictionary.all_objects.get(pk=self.pk)

xform_dict = deepcopy(self.__dict__)
xform_dict.pop('_state', None)
return DataDictionary(**xform_dict)

@property
def has_instances_with_geopoints(self):
Expand Down
3 changes: 1 addition & 2 deletions onadata/apps/logger/xform_instance_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,9 @@ def get_xform_media_question_xpaths(
xform: 'onadata.apps.logger.models.XForm',
) -> list:
logger = logging.getLogger('console_logger')
parser = XFormInstanceParser(xform.xml, xform.data_dictionary())
parser = XFormInstanceParser(xform.xml, xform.data_dictionary(use_cache=True))
all_attributes = _get_all_attributes(parser.get_root_node())
media_field_xpaths = []

# This code expects that the attributes from Enketo Express are **always**
# sent in the same order.
# For example:
Expand Down
17 changes: 17 additions & 0 deletions onadata/apps/main/migrations/0013_remove_userprofile_created_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 3.2.15 on 2023-10-30 16:38

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('main', '0012_add_validate_password_flag_to_profile'),
]

operations = [
migrations.RemoveField(
model_name='userprofile',
name='created_by',
),
]
4 changes: 0 additions & 4 deletions onadata/apps/main/models/user_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class UserProfile(models.Model):
require_auth = models.BooleanField(default=True)
address = models.CharField(max_length=255, blank=True)
phonenumber = models.CharField(max_length=30, blank=True)
created_by = models.ForeignKey(User, null=True, blank=True, on_delete=models.CASCADE)
num_of_submissions = models.IntegerField(default=0)
attachment_storage_bytes = models.BigIntegerField(default=0)
metadata = models.JSONField(default=dict, blank=True)
Expand Down Expand Up @@ -80,9 +79,6 @@ def set_object_permissions(sender, instance=None, created=False, **kwargs):
for perm in get_perms_for_model(UserProfile):
assign_perm(perm.codename, instance.user, instance)

if instance.created_by:
assign_perm(perm.codename, instance.created_by, instance)


post_save.connect(set_object_permissions, sender=UserProfile,
dispatch_uid='set_object_permissions')
Expand Down
12 changes: 9 additions & 3 deletions onadata/libs/utils/logger_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,9 +735,15 @@ def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]:

# Update Attachment objects to hide them if they are not used anymore.
# We do not want to delete them until the instance itself is deleted.
queryset = Attachment.objects.filter(
instance=instance
).exclude(media_file_basename__in=basenames)

# FIXME Temporary hack to leave background-audio files and audit files alone
# Bug comes from `get_xform_media_question_xpaths()`
queryset = Attachment.objects.filter(instance=instance).exclude(
Q(media_file_basename__in=basenames)
| Q(media_file_basename__endswith='.enc')
| Q(media_file_basename='audit.csv')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$')
)
soft_deleted_attachments = list(queryset.all())
queryset.update(deleted_at=timezone.now())

Expand Down

0 comments on commit 87054fb

Please sign in to comment.