diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_children.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_children.csv new file mode 100644 index 000000000..88bc23621 --- /dev/null +++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_children.csv @@ -0,0 +1,3 @@ +children/childs_name,children/childs_age,_id,_uuid,_submission_time,_index,_parent_table_name,_parent_index,_tags,_notes +Tom,12,,,,1,tutorial_w_repeats,1,, +Dick,5,,,,2,tutorial_w_repeats,1,, diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_choices.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_choices.csv new file mode 100644 index 000000000..651494261 --- /dev/null +++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_choices.csv @@ -0,0 +1,8 @@ +list name,name,label +yes_no,0,no +yes_no,1,yes +,, +browsers,firefox,Mozilla Firefox +browsers,chrome,Google Chrome +browsers,ie,Internet Explorer +browsers,safari,Safari diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_flattened_raw.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_flattened_raw.csv new file mode 100644 index 000000000..47d193a1b --- /dev/null +++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_flattened_raw.csv @@ -0,0 +1,2 @@ +name,age,picture,has_children,children[1]/childs_age,children[1]/childs_name,children[2]/childs_age,children[2]/childs_name,gps,_gps_latitude,_gps_longitude,_gps_altitude,_gps_precision,web_browsers,meta/instanceID,_uuid,_submission_time,_tags,_notes +Bob,25,,1,12,Tom,5,Dick,-1.2625621 36.7921711 0.0 20.0,-1.2625621,36.7921711,0.0,20.0,,uuid:b31c6ac2-b8ca-4180-914f-c844fa10ed3b,b31c6ac2-b8ca-4180-914f-c844fa10ed3b,2013-02-18T15:54:01,, diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_survey.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_survey.csv new file mode 100644 index 000000000..ac02574dc --- /dev/null +++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_survey.csv @@ -0,0 +1,11 @@ +type,name,label +text,name,1. What is your name? +integer,age,2. How old are you? +image,picture,3. May I take your picture? +select one from yes_no,has_children,4. Do you have any children? +begin repeat,children,5. Children +text,childs_name,5.1 Childs name? +integer,childs_age,5.2 Childs age? +end repeat,, +geopoint,gps,5. Record your GPS coordinates. +select all that apply from browsers,web_browsers,6. What web browsers do you use? diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_tutorial_w_repeats.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_tutorial_w_repeats.csv new file mode 100644 index 000000000..45674287a --- /dev/null +++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_tutorial_w_repeats.csv @@ -0,0 +1,2 @@ +name,age,picture,has_children,gps,_gps_latitude,_gps_longitude,_gps_altitude,_gps_precision,web_browsers,web_browsers/firefox,web_browsers/chrome,web_browsers/ie,web_browsers/safari,meta/instanceID,_id,_uuid,_submission_time,_index,_parent_table_name,_parent_index,_tags,_notes +Bob,25,,1,-1.2625621 36.7921711 0.0 20.0,-1.2625621,36.7921711,0.0,20.0,,,,,,uuid:b31c6ac2-b8ca-4180-914f-c844fa10ed3b,###EXPORT_ID###,b31c6ac2-b8ca-4180-914f-c844fa10ed3b,2013-02-18T15:54:01,1,,-1,, diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xls b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xls new file mode 100755 index 000000000..71aea101d Binary files /dev/null and b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xls differ diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xml b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xml new file mode 100644 index 000000000..a68cbc554 --- /dev/null +++ b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xml @@ -0,0 +1 @@ +Bob251Tom12Dick5-1.2625621 36.7921711 0.0 20.0uuid:b31c6ac2-b8ca-4180-914f-c844fa10ed3b diff --git a/onadata/apps/main/tests/test_google_docs_export.py b/onadata/apps/main/tests/test_google_docs_export.py deleted file mode 100644 index 16abdf82d..000000000 --- a/onadata/apps/main/tests/test_google_docs_export.py +++ /dev/null @@ -1,51 +0,0 @@ -from django.core.urlresolvers import reverse -from mock import Mock, patch - -from onadata.apps.main.google_export import refresh_access_token -from onadata.apps.main.models import TokenStorageModel -from onadata.apps.viewer.views import google_xls_export -from onadata.libs.utils.google import oauth2_token as token -from test_base import TestBase - - -class TestGoogleDocsExport(TestBase): - - def setUp(self): - self.token = token - self.refresh_token = '1/ISGBd-OBWr-RbXN2Fq879Xht1inmg_n4sJ_Wd4CoQNk' - self.token.refresh_token = self.refresh_token - self._create_user_and_login() - - @patch('gdata.docs.client.DocsClient') - @patch('urllib2.urlopen') - def test_google_docs_export(self, mock_urlopen, mock_docs_client): - mock_urlopen.return_value.read.return_value = '{"access_token": "ABC"}' - mock_xls_doc = Mock() - mock_xls_doc.find_html_link.return_value = 'link' - mock_docs_client = Mock() - mock_docs_client.CreateResource.return_value = mock_xls_doc - mock_docs_client.return_value = mock_docs_client - - self._publish_transportation_form() - self._make_submissions() - - initial_token_count = TokenStorageModel.objects.all().count() - self.token = refresh_access_token(self.token, self.user) - - self.assertIsNotNone(self.token.access_token) - self.assertEqual( - TokenStorageModel.objects.all().count(), initial_token_count + 1) - - response = self.client.get(reverse(google_xls_export, kwargs={ - 'username': self.user.username, - 'id_string': self.xform.id_string - })) - self.assertEqual(response.status_code, 302) - # share the data, log out, and check the export - self._share_form_data() - self._logout() - response = self.client.get(reverse(google_xls_export, kwargs={ - 'username': self.user.username, - 'id_string': self.xform.id_string - })) - self.assertEqual(response.status_code, 302) diff --git a/onadata/apps/main/tests/test_google_sheets_export.py b/onadata/apps/main/tests/test_google_sheets_export.py new file mode 100644 index 000000000..8ea750ad8 --- /dev/null +++ b/onadata/apps/main/tests/test_google_sheets_export.py @@ -0,0 +1,157 @@ +import csv +import os + +import gdata.gauth + +from django.conf import settings +from django.core.files.storage import get_storage_class +from django.core.files.temp import NamedTemporaryFile +from django.core.urlresolvers import reverse +from django.utils.dateparse import parse_datetime +from mock import Mock, patch + +from onadata.apps.viewer.models.export import Export +from onadata.libs.utils.export_tools import generate_export +from onadata.libs.utils.google import oauth2_token +from onadata.libs.utils.google_sheets import SheetsClient +from test_base import TestBase + + + +class MockCell(): + def __init__(self, row, col, value): + self.row = row + self.col = col + self.value = value + + +class TestExport(TestBase): + + def setUp(self): + # Prepare a fake token. + self.token = oauth2_token + self.token.refresh_token = 'foo' + self.token.access_token = 'bar' + self.token_blob = gdata.gauth.token_to_blob(self.token) + + # Files that contain the expected spreadsheet data. + self.fixture_dir = os.path.join( + self.this_directory, 'fixtures', 'google_sheets_export') + + # Create a test user and login. + self._create_user_and_login() + + # Create a test submission. + path = os.path.join(self.fixture_dir, 'tutorial_w_repeats.xls') + self._publish_xls_file_and_set_xform(path) + path = os.path.join(self.fixture_dir, 'tutorial_w_repeats.xml') + self._submission_time = parse_datetime('2013-02-18 15:54:01Z') + self._make_submission( + path, forced_submission_time=self._submission_time) + + + def _mock_worksheet(self, csv_writer): + """Creates a mock worksheet object with append_row and insert_row + methods writing to csv_writer.""" + worksheet = Mock() + worksheet.append_row.side_effect = \ + lambda values: csv_writer.writerow(values) + def create_cell(r, c): + return MockCell(r, c, None) + worksheet.cell.side_effect = create_cell + worksheet.update_cells.side_effect = \ + lambda cells: csv_writer.writerow([cell.value for cell in cells]) + worksheet.insert_row.side_effect = \ + lambda values, index: csv_writer.writerow(values) + return worksheet + + def _mock_spreadsheet(self, csv_writers): + spreadsheet = Mock() + spreadsheet.add_worksheet.side_effect = \ + [self._mock_worksheet(writer) for writer in csv_writers] + return spreadsheet + + def _setup_result_files(self, expected_file_names): + expected_files = [open(os.path.join(self.fixture_dir, f)) + for f in expected_file_names] + result_files = [NamedTemporaryFile() for f in expected_file_names] + csv_writers = [csv.writer(f, lineterminator='\n') for f in result_files] + return expected_files, result_files, csv_writers + + def assertStorageExists(self, export): + storage = get_storage_class()() + self.assertTrue(storage.exists(export.filepath)) + _, ext = os.path.splitext(export.filename) + self.assertEqual(ext, '.gsheets') + + def assertEqualExportFiles(self, expected_files, result_files, export): + for result, expected in zip(result_files, expected_files): + result.flush() + result.seek(0) + expected_content = expected.read() + # Fill in the actual export id (varies based on test order) + expected_content = expected_content.replace('###EXPORT_ID###', + str(export.id)) + result_content = result.read() + self.assertEquals(result_content, expected_content) + + + @patch.object(SheetsClient, 'new') + @patch.object(SheetsClient, 'add_service_account_to_spreadsheet') + @patch.object(SheetsClient, 'get_worksheets_feed') + @patch('urllib2.urlopen') + def test_gsheets_export_output(self, + mock_urlopen, + mock_get_worksheets, + mock_account_add_service_account, + mock_new): + expected_files, result_files, csv_writers = self._setup_result_files( + ['expected_tutorial_w_repeats.csv', + 'expected_children.csv', + 'expected_survey.csv', + 'expected_choices.csv']) + mock_urlopen.return_value.read.return_value = '{"access_token": "baz"}' + mock_new.return_value = self._mock_spreadsheet(csv_writers) + + # Test Google Sheets export. + export = generate_export(export_type=Export.GSHEETS_EXPORT, + extension='gsheets', + username=self.user.username, + id_string='tutorial_w_repeats', + split_select_multiples=True, + binary_select_multiples=False, + google_token=self.token_blob, + flatten_repeated_fields=False, + export_xlsform=True) + self.assertStorageExists(export) + self.assertEqualExportFiles(expected_files, result_files, export) + + + @patch.object(SheetsClient, 'new') + @patch.object(SheetsClient, 'add_service_account_to_spreadsheet') + @patch.object(SheetsClient, 'get_worksheets_feed') + @patch('urllib2.urlopen') + def test_gsheets_export_flattened_output(self, + mock_urlopen, + mock_get_worksheets, + mock_account_add_service_account, + mock_new): + expected_files, result_files, csv_writers = self._setup_result_files( + ['expected_flattened_raw.csv']) + mock_urlopen.return_value.read.return_value = '{"access_token": "baz"}' + mock_spreadsheet = self._mock_spreadsheet(csv_writers) + mock_new.return_value = mock_spreadsheet + + # Test Google Sheets export. + export = generate_export(export_type=Export.GSHEETS_EXPORT, + extension='gsheets', + username=self.user.username, + id_string='tutorial_w_repeats', + split_select_multiples=False, + binary_select_multiples=False, + google_token=self.token_blob, + flatten_repeated_fields=True, + export_xlsform=False) + self.assertStorageExists(export) + self.assertEqualExportFiles(expected_files, result_files, export) + \ No newline at end of file diff --git a/onadata/apps/main/urls.py b/onadata/apps/main/urls.py index 64a0cf779..df161f113 100644 --- a/onadata/apps/main/urls.py +++ b/onadata/apps/main/urls.py @@ -152,8 +152,6 @@ 'onadata.apps.viewer.views.kml_export'), url(r"^(?P\w+)/forms/(?P[^/]+)/data\.zip", 'onadata.apps.viewer.views.zip_export'), - url(r"^(?P\w+)/forms/(?P[^/]+)/gdocs$", - 'onadata.apps.viewer.views.google_xls_export'), url(r"^(?P\w+)/forms/(?P[^/]+)/map_embed", 'onadata.apps.viewer.views.map_embed_view'), url(r"^(?P\w+)/forms/(?P[^/]+)/map", diff --git a/onadata/apps/viewer/models/export.py b/onadata/apps/viewer/models/export.py index 660910dac..1516edd49 100644 --- a/onadata/apps/viewer/models/export.py +++ b/onadata/apps/viewer/models/export.py @@ -28,7 +28,7 @@ def __str__(self): CSV_EXPORT = 'csv' KML_EXPORT = 'kml' ZIP_EXPORT = 'zip' - GDOC_EXPORT = 'gdoc' + GSHEETS_EXPORT = 'gsheets' CSV_ZIP_EXPORT = 'csv_zip' SAV_ZIP_EXPORT = 'sav_zip' SAV_EXPORT = 'sav' @@ -48,7 +48,7 @@ def __str__(self): EXPORT_TYPES = [ (XLS_EXPORT, 'Excel'), (CSV_EXPORT, 'CSV'), - (GDOC_EXPORT, 'GDOC'), + (GSHEETS_EXPORT, 'Google Sheets'), (ZIP_EXPORT, 'ZIP'), (KML_EXPORT, 'kml'), (CSV_ZIP_EXPORT, 'CSV ZIP'), diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index d466b10ab..d632f44b6 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -30,9 +30,8 @@ def _create_export(xform, export_type): 'export_id': export.id, 'query': query, } - if export_type in [Export.XLS_EXPORT, Export.GDOC_EXPORT, - Export.CSV_EXPORT, Export.CSV_ZIP_EXPORT, - Export.SAV_ZIP_EXPORT]: + if export_type in [Export.XLS_EXPORT, Export.CSV_EXPORT, + Export.CSV_ZIP_EXPORT, Export.SAV_ZIP_EXPORT]: if options and "group_delimiter" in options: arguments["group_delimiter"] = options["group_delimiter"] if options and "split_select_multiples" in options: @@ -43,7 +42,7 @@ def _create_export(xform, export_type): options["binary_select_multiples"] # start async export - if export_type in [Export.XLS_EXPORT, Export.GDOC_EXPORT]: + if export_type == Export.XLS_EXPORT: result = create_xls_export.apply_async((), arguments, countdown=10) elif export_type == Export.CSV_EXPORT: result = create_csv_export.apply_async( @@ -56,6 +55,23 @@ def _create_export(xform, export_type): (), arguments, countdown=10) else: raise Export.ExportTypeError + elif export_type == Export.GSHEETS_EXPORT: + if options and "group_delimiter" in options: + arguments["group_delimiter"] = options["group_delimiter"] + if options and "split_select_multiples" in options: + arguments["split_select_multiples"] =\ + options["split_select_multiples"] + if options and "binary_select_multiples" in options: + arguments["binary_select_multiples"] =\ + options["binary_select_multiples"] + if options and "google_token" in options: + arguments["google_token"] = options["google_token"] + if options and "flatten_repeated_fields" in options: + arguments["flatten_repeated_fields"] =\ + options["flatten_repeated_fields"] + if options and "export_xlsform" in options: + arguments["export_xlsform"] = options["export_xlsform"] + result = create_gsheets_export.apply_async((), arguments, countdown=10) elif export_type == Export.ZIP_EXPORT: # start async export result = create_zip_export.apply_async( @@ -125,6 +141,44 @@ def create_xls_export(username, id_string, export_id, query=None, else: return gen_export.id +@task() +def create_gsheets_export( + username, id_string, export_id, query=None, group_delimiter='/', + split_select_multiples=True, binary_select_multiples=False, + google_token=None, flatten_repeated_fields=True, export_xlsform=True): + # we re-query the db instead of passing model objects according to + # http://docs.celeryproject.org/en/latest/userguide/tasks.html#state + try: + export = Export.objects.get(id=export_id) + except Export.DoesNotExist: + # no export for this ID return None. + return None + + # though export is not available when for has 0 submissions, we + # catch this since it potentially stops celery + try: + gen_export = generate_export( + Export.GSHEETS_EXPORT, None, username, id_string, export_id, query, + group_delimiter, split_select_multiples, binary_select_multiples, + google_token, flatten_repeated_fields, export_xlsform) + except (Exception, NoRecordsFoundError) as e: + export.internal_status = Export.FAILED + export.save() + # mail admins + details = { + 'export_id': export_id, + 'username': username, + 'id_string': id_string + } + report_exception("Google Sheets Export Exception: Export ID - " + "%(export_id)s, /%(username)s/%(id_string)s" + % details, e, sys.exc_info()) + # Raise for now to let celery know we failed + # - doesnt seem to break celery` + raise + else: + return gen_export.id + @task() def create_csv_export(username, id_string, export_id, query=None, diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index fd64b150c..217b83f6e 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -34,7 +34,7 @@ kml_export_data, newset_export_for) from onadata.libs.utils.image_tools import image_url -from onadata.libs.utils.google import google_export_xls, redirect_uri +from onadata.libs.utils.google import redirect_uri from onadata.libs.utils.log import audit_log, Actions from onadata.libs.utils.logger_tools import response_with_mimetype_and_name,\ disposition_ext_and_date @@ -298,7 +298,18 @@ def create_export(request, username, id_string, export_type): xform = get_object_or_404(XForm, id_string__exact=id_string, user=owner) if not has_permission(xform, owner, request): return HttpResponseForbidden(_(u'Not shared.')) - + + token = None + if export_type == Export.GSHEETS_EXPORT: + redirect_url = reverse( + create_export, + kwargs={ + 'username': username, 'id_string': id_string, + 'export_type': export_type}) + token = _get_google_token(request, redirect_url) + if isinstance(token, HttpResponse): + return token + if export_type == Export.EXTERNAL_EXPORT: # check for template before trying to generate a report if not MetaData.external_export(xform=xform): @@ -313,11 +324,10 @@ def create_export(request, username, id_string, export_type): return HttpResponseBadRequest( _("%s is not a valid delimiter" % group_delimiter)) - # default is True, so when dont_.. is yes - # split_select_multiples becomes False - split_select_multiples = request.POST.get( - "options[dont_split_select_multiples]", "no") == "no" - + split_select_multiples = "options[split_select_multiples]" in request.POST + flatten_repeated_fields = "options[flatten_repeated_fields]" in request.POST + export_xlsform = "options[export_xlsform]" in request.POST + binary_select_multiples = getattr(settings, 'BINARY_SELECT_MULTIPLES', False) # external export option @@ -326,9 +336,13 @@ def create_export(request, username, id_string, export_type): 'group_delimiter': group_delimiter, 'split_select_multiples': split_select_multiples, 'binary_select_multiples': binary_select_multiples, + 'flatten_repeated_fields': flatten_repeated_fields, + 'export_xlsform': export_xlsform, 'meta': meta.replace(",", "") if meta else None } - + if token: + options['google_token'] = token + try: create_async_export(xform, export_type, query, force_xlsx, options) except Export.ExportTypeError: @@ -374,7 +388,7 @@ def _get_google_token(request, redirect_to_url): def export_list(request, username, id_string, export_type): - if export_type == Export.GDOC_EXPORT: + if export_type == Export.GSHEETS_EXPORT: redirect_url = reverse( export_list, kwargs={ @@ -455,27 +469,8 @@ def export_progress(request, username, id_string, export_type): 'filename': export.filename }) status['filename'] = export.filename - if export.export_type == Export.GDOC_EXPORT and \ - export.export_url is None: - redirect_url = reverse( - export_progress, - kwargs={ - 'username': username, 'id_string': id_string, - 'export_type': export_type}) - token = _get_google_token(request, redirect_url) - if isinstance(token, HttpResponse): - return token - status['url'] = None - try: - url = google_export_xls( - export.full_filepath, xform.title, token, blob=True) - except Exception, e: - status['error'] = True - status['message'] = e.message - else: - export.export_url = url - export.save() - status['url'] = url + if export.export_type == Export.GSHEETS_EXPORT: + status['url'] = export.export_url if export.export_type == Export.EXTERNAL_EXPORT \ and export.export_url is None: status['url'] = url @@ -499,7 +494,7 @@ def export_download(request, username, id_string, export_type, filename): # find the export entry in the db export = get_object_or_404(Export, xform=xform, filename=filename) - if (export_type == Export.GDOC_EXPORT or export_type == Export.EXTERNAL_EXPORT) \ + if (export_type == Export.GSHEETS_EXPORT or export_type == Export.EXTERNAL_EXPORT) \ and export.export_url is not None: return HttpResponseRedirect(export.export_url) @@ -645,55 +640,6 @@ def kml_export(request, username, id_string): return response -def google_xls_export(request, username, id_string): - token = None - if request.user.is_authenticated(): - try: - ts = TokenStorageModel.objects.get(id=request.user) - except TokenStorageModel.DoesNotExist: - pass - else: - token = ts.token - elif request.session.get('access_token'): - token = request.session.get('access_token') - - if token is None: - request.session["google_redirect_url"] = reverse( - google_xls_export, - kwargs={'username': username, 'id_string': id_string}) - return HttpResponseRedirect(redirect_uri) - - owner = get_object_or_404(User, username__iexact=username) - xform = get_object_or_404(XForm, id_string__exact=id_string, user=owner) - if not has_permission(xform, owner, request): - return HttpResponseForbidden(_(u'Not shared.')) - - valid, dd = dd_for_params(id_string, owner, request) - if not valid: - return dd - - ddw = XlsWriter() - tmp = NamedTemporaryFile(delete=False) - ddw.set_file(tmp) - ddw.set_data_dictionary(dd) - temp_file = ddw.save_workbook_to_file() - temp_file.close() - url = google_export_xls(tmp.name, xform.title, token, blob=True) - os.unlink(tmp.name) - audit = { - "xform": xform.id_string, - "export_type": "google" - } - audit_log( - Actions.EXPORT_CREATED, request.user, owner, - _("Created Google Docs export on '%(id_string)s'.") % - { - 'id_string': xform.id_string, - }, audit, request) - - return HttpResponseRedirect(url) - - def data_view(request, username, id_string): owner = get_object_or_404(User, username__iexact=username) xform = get_object_or_404(XForm, id_string__exact=id_string, user=owner) diff --git a/onadata/libs/utils/csv_export.py b/onadata/libs/utils/csv_export.py new file mode 100644 index 000000000..8c4a8a880 --- /dev/null +++ b/onadata/libs/utils/csv_export.py @@ -0,0 +1,83 @@ +import csv + +from django.core.files.temp import NamedTemporaryFile + +from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX +from onadata.libs.utils.export_builder import ExportBuilder + + +class FlatCsvExportBuilder(ExportBuilder): + + + def __init__(self, xform, config): + super(FlatCsvExportBuilder, self).__init__(xform, config) + + def export(self, path, data, username, id_string, filter_query): + # TODO resolve circular import + from onadata.apps.viewer.pandas_mongo_bridge import CSVDataFrameBuilder + + csv_builder = CSVDataFrameBuilder( + username, id_string, filter_query, self.group_delimiter, + self.split_select_multiples, self.binary_select_multiples) + csv_builder.export_to(path) + + +class ZippedCsvExportBuilder(ExportBuilder): + + def __init__(self, xform, config): + super(FlatCsvExportBuilder, self).__init__(xform, config) + + @classmethod + def write_row(row, csv_writer, fields): + csv_writer.writerow( + [ExportBuilder.encode_if_str(row, field) for field in fields]) + + def export(self, path, data, *args): + csv_defs = {} + for section in self.sections: + csv_file = NamedTemporaryFile(suffix=".csv") + csv_writer = csv.writer(csv_file) + csv_defs[section['name']] = { + 'csv_file': csv_file, 'csv_writer': csv_writer} + + # write headers + for section in self.sections: + fields = ([element['title'] for element in section['elements']] + + self.EXTRA_FIELDS) + csv_defs[section['name']]['csv_writer'].writerow( + [f.encode('utf-8') for f in fields]) + + indices = {} + survey_name = self.survey.name + for index, d in enumerate(data, 1): + # decode mongo section names + joined_export = ExportBuilder.dict_to_joined_export( + d, index, indices, survey_name) + output = ExportBuilder.decode_mongo_encoded_section_names( + joined_export) + # attach meta fields (index, parent_index, parent_table) + # output has keys for every section + if survey_name not in output: + output[survey_name] = {} + output[survey_name][INDEX] = index + output[survey_name][PARENT_INDEX] = -1 + for section in self.sections: + # get data for this section and write to csv + section_name = section['name'] + csv_def = csv_defs[section_name] + fields = [ + element['xpath'] for element in + section['elements']] + self.EXTRA_FIELDS + csv_writer = csv_def['csv_writer'] + # section name might not exist within the output, e.g. data was + # not provided for said repeat - write test to check this + row = output.get(section_name, None) + if type(row) == dict: + ZippedCsvExportBuilder.write_row( + self.pre_process_row(row, section), + csv_writer, fields) + elif type(row) == list: + for child_row in row: + ZippedCsvExportBuilder.write_row( + self.pre_process_row(child_row, section), + csv_writer, fields) diff --git a/onadata/libs/utils/export_builder.py b/onadata/libs/utils/export_builder.py new file mode 100644 index 000000000..0817fec97 --- /dev/null +++ b/onadata/libs/utils/export_builder.py @@ -0,0 +1,346 @@ +import six + +from datetime import datetime, date +from pyxform.question import Question +from pyxform.section import Section, RepeatingSection + +from onadata.apps.viewer.models.parsed_instance import\ + _is_invalid_for_mongo, _encode_for_mongo, _decode_from_mongo +from onadata.libs.utils.common_tags import ( + ID, XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION, BAMBOO_DATASET_ID, + DELETEDAT, INDEX, PARENT_INDEX, PARENT_TABLE_NAME, + SUBMISSION_TIME, UUID, TAGS, NOTES) + +QUESTION_TYPES_TO_EXCLUDE = [ + u'note', +] +# the bind type of select multiples that we use to compare +MULTIPLE_SELECT_BIND_TYPE = u"select" +GEOPOINT_BIND_TYPE = u"geopoint" + + +class ExportBuilder(object): + """A base class for export builders.""" + + IGNORED_COLUMNS = [XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION, + BAMBOO_DATASET_ID, DELETEDAT] + # fields we export but are not within the form's structure + EXTRA_FIELDS = [ID, UUID, SUBMISSION_TIME, INDEX, PARENT_TABLE_NAME, + PARENT_INDEX, TAGS, NOTES] + + # column group delimiters + GROUP_DELIMITER_SLASH = '/' + GROUP_DELIMITER_DOT = '.' + GROUP_DELIMITERS = [GROUP_DELIMITER_SLASH, GROUP_DELIMITER_DOT] + TYPES_TO_CONVERT = ['int', 'decimal', 'date'] # , 'dateTime'] + CONVERT_FUNCS = { + 'int': lambda x: int(x), + 'decimal': lambda x: float(x), + 'date': lambda x: datetime.strptime(x, "%d%m%Y").date(), + 'dateTime': lambda x: datetime.strptime(x[:19], '%Y-%m-%dT%H:%M:%S') + } + SHEET_NAME_MAX_CHARS = 31 + SHEET_TITLE = 'export' + + # Configuration options + group_delimiter = '/' + split_select_multiples = True + binary_select_multiples = False + + def __init__(self, xform, config=None): + self.xform = xform + self.group_delimiter = config.get( + 'group_delimiter', self.GROUP_DELIMITER_SLASH) + self.split_select_multiples = config.get( + 'split_select_multiples', True) + self.binary_select_multiples = config.get( + 'binary_select_multiples', False) + + def export(self, path, data, *args): + raise NotImplementedError + + def set_survey(self, survey): + # TODO resolve circular import + from onadata.apps.viewer.models.data_dictionary import\ + DataDictionary + + def build_sections( + current_section, survey_element, sections, select_multiples, + gps_fields, encoded_fields, field_delimiter='/'): + for child in survey_element.children: + current_section_name = current_section['name'] + # if a section, recurs + if isinstance(child, Section): + # if its repeating, build a new section + if isinstance(child, RepeatingSection): + # section_name in recursive call changes + section = { + 'name': child.get_abbreviated_xpath(), + 'elements': []} + self.sections.append(section) + build_sections( + section, child, sections, select_multiples, + gps_fields, encoded_fields, field_delimiter) + else: + # its a group, recurs using the same section + build_sections( + current_section, child, sections, select_multiples, + gps_fields, encoded_fields, field_delimiter) + elif isinstance(child, Question) and child.bind.get(u"type")\ + not in QUESTION_TYPES_TO_EXCLUDE: + # add to survey_sections + if isinstance(child, Question): + child_xpath = child.get_abbreviated_xpath() + current_section['elements'].append({ + 'title': ExportBuilder.format_field_title( + child.get_abbreviated_xpath(), + field_delimiter), + 'xpath': child_xpath, + 'type': child.bind.get(u"type") + }) + + if _is_invalid_for_mongo(child_xpath): + if current_section_name not in encoded_fields: + encoded_fields[current_section_name] = {} + encoded_fields[current_section_name].update( + {child_xpath: _encode_for_mongo(child_xpath)}) + + # if its a select multiple, make columns out of its choices + if child.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE\ + and self.split_select_multiples: + for c in child.children: + _xpath = c.get_abbreviated_xpath() + _title = ExportBuilder.format_field_title( + _xpath, field_delimiter) + choice = { + 'title': _title, + 'xpath': _xpath, + 'type': 'string' + } + + if choice not in current_section['elements']: + current_section['elements'].append(choice) + _append_xpaths_to_section( + current_section_name, select_multiples, + child.get_abbreviated_xpath(), + [c.get_abbreviated_xpath() + for c in child.children]) + + # split gps fields within this section + if child.bind.get(u"type") == GEOPOINT_BIND_TYPE: + # add columns for geopoint components + xpaths = DataDictionary.get_additional_geopoint_xpaths( + child.get_abbreviated_xpath()) + current_section['elements'].extend( + [ + { + 'title': ExportBuilder.format_field_title( + xpath, field_delimiter), + 'xpath': xpath, + 'type': 'decimal' + } + for xpath in xpaths + ]) + _append_xpaths_to_section( + current_section_name, gps_fields, + child.get_abbreviated_xpath(), xpaths) + + def _append_xpaths_to_section(current_section_name, field_list, xpath, + xpaths): + if current_section_name not in field_list: + field_list[current_section_name] = {} + field_list[ + current_section_name][xpath] = xpaths + + self.survey = survey + self.select_multiples = {} + self.gps_fields = {} + self.encoded_fields = {} + main_section = {'name': survey.name, 'elements': []} + self.sections = [main_section] + build_sections( + main_section, self.survey, self.sections, + self.select_multiples, self.gps_fields, self.encoded_fields, + self.group_delimiter) + + def section_by_name(self, name): + matches = filter(lambda s: s['name'] == name, self.sections) + assert(len(matches) == 1) + return matches[0] + + def get_valid_sheet_name(self, desired_name, existing_names): + # a sheet name has to be <= 31 characters and not a duplicate of an + # existing sheet + # truncate sheet_name to SHEET_NAME_MAX_CHARS + new_sheet_name = desired_name[:self.SHEET_NAME_MAX_CHARS] + + # make sure its unique within the list + i = 1 + generated_name = new_sheet_name + while generated_name in existing_names: + digit_length = len(str(i)) + allowed_name_len = self.SHEET_NAME_MAX_CHARS - digit_length + # make name the required len + if len(generated_name) > allowed_name_len: + generated_name = generated_name[:allowed_name_len] + generated_name = "{0}{1}".format(generated_name, i) + i += 1 + return generated_name + + + @classmethod + def format_field_title(cls, abbreviated_xpath, field_delimiter): + if field_delimiter != '/': + return field_delimiter.join(abbreviated_xpath.split('/')) + return abbreviated_xpath + + @classmethod + def do_split_select_multiples(self, row, select_multiples): + # for each select_multiple, get the associated data and split it + for xpath, choices in select_multiples.iteritems(): + # get the data matching this xpath + data = row.get(xpath) + selections = [] + if data: + selections = [ + u'{0}/{1}'.format( + xpath, selection) for selection in data.split()] + if not self.binary_select_multiples: + row.update(dict( + [(choice, choice in selections if selections else None) + for choice in choices])) + else: + YES = 1 + NO = 0 + row.update(dict( + [(choice, YES if choice in selections else NO) + for choice in choices])) + return row + + @classmethod + def split_gps_components(cls, row, gps_fields): + # for each gps_field, get associated data and split it + for xpath, gps_components in gps_fields.iteritems(): + data = row.get(xpath) + if data: + gps_parts = data.split() + if len(gps_parts) > 0: + row.update(zip(gps_components, gps_parts)) + return row + + @classmethod + def decode_mongo_encoded_fields(cls, row, encoded_fields): + for xpath, encoded_xpath in encoded_fields.iteritems(): + if row.get(encoded_xpath): + val = row.pop(encoded_xpath) + row.update({xpath: val}) + return row + + @classmethod + def decode_mongo_encoded_section_names(cls, data): + return dict([(_decode_from_mongo(k), v) for k, v in data.iteritems()]) + + @classmethod + def convert_type(cls, value, data_type): + """ + Convert data to its native type e.g. string '1' to int 1 + @param value: the string value to convert + @param data_type: the native data type to convert to + @return: the converted value + """ + func = ExportBuilder.CONVERT_FUNCS.get(data_type, lambda x: x) + try: + return func(value) + except ValueError: + return value + + def pre_process_row(self, row, section): + """ + Split select multiples, gps and decode . and $ + """ + section_name = section['name'] + + # first decode fields so that subsequent lookups + # have decoded field names + if section_name in self.encoded_fields: + row = ExportBuilder.decode_mongo_encoded_fields( + row, self.encoded_fields[section_name]) + + if self.split_select_multiples and\ + section_name in self.select_multiples: + row = self.do_split_select_multiples( + row, self.select_multiples[section_name]) + + if section_name in self.gps_fields: + row = ExportBuilder.split_gps_components( + row, self.gps_fields[section_name]) + + # convert to native types + for elm in section['elements']: + # only convert if its in our list and its not empty, just to + # optimize + value = row.get(elm['xpath']) + if elm['type'] in ExportBuilder.TYPES_TO_CONVERT\ + and value is not None and value != '': + row[elm['xpath']] = ExportBuilder.convert_type( + value, elm['type']) + + return row + + @classmethod + def dict_to_joined_export(cls, data, index, indices, name): + """ + Converts a dict into one or more tabular datasets + """ + output = {} + # TODO: test for _geolocation and attachment lists + if isinstance(data, dict): + for key, val in data.iteritems(): + if isinstance(val, list) and key not in [NOTES, TAGS]: + output[key] = [] + for child in val: + if key not in indices: + indices[key] = 0 + indices[key] += 1 + child_index = indices[key] + new_output = cls.dict_to_joined_export( + child, child_index, indices, key) + d = {INDEX: child_index, PARENT_INDEX: index, + PARENT_TABLE_NAME: name} + # iterate over keys within new_output and append to + # main output + for out_key, out_val in new_output.iteritems(): + if isinstance(out_val, list): + if out_key not in output: + output[out_key] = [] + output[out_key].extend(out_val) + else: + d.update(out_val) + output[key].append(d) + else: + if name not in output: + output[name] = {} + if key in [TAGS]: + output[name][key] = ",".join(val) + elif key in [NOTES]: + output[name][key] = "\r\n".join( + [v['note'] for v in val]) + else: + output[name][key] = val + + return output + + @classmethod + def encode_if_str(cls, row, key, encode_dates=False): + val = row.get(key) + + if isinstance(val, six.string_types): + return val.encode('utf-8') + + if encode_dates and isinstance(val, datetime): + return val.strftime('%Y-%m-%dT%H:%M:%S%z').encode('utf-8') + + if encode_dates and isinstance(val, date): + return val.strftime('%Y-%m-%d').encode('utf-8') + + return val diff --git a/onadata/libs/utils/export_tools.py b/onadata/libs/utils/export_tools.py index 44dfa4d62..c0e0dca2f 100644 --- a/onadata/libs/utils/export_tools.py +++ b/onadata/libs/utils/export_tools.py @@ -1,12 +1,10 @@ -import csv from datetime import datetime, date import json import os import re import six from urlparse import urlparse -from zipfile import ZipFile - + from bson import json_util from django.conf import settings from django.core.files.base import File @@ -14,52 +12,28 @@ from django.core.files.storage import get_storage_class from django.contrib.auth.models import User from django.shortcuts import render_to_response -from openpyxl.date_time import SharedDate -from openpyxl.workbook import Workbook -from pyxform.question import Question -from pyxform.section import Section, RepeatingSection -from savReaderWriter import SavWriter from json2xlsclient.client import Client from onadata.apps.logger.models import Attachment, Instance, XForm from onadata.apps.main.models.meta_data import MetaData from onadata.apps.viewer.models.export import Export -from onadata.apps.viewer.models.parsed_instance import\ - _is_invalid_for_mongo, _encode_for_mongo, dict_for_mongo,\ - _decode_from_mongo -from onadata.libs.utils.viewer_tools import create_attachments_zipfile,\ - image_urls +from onadata.apps.viewer.models.parsed_instance import dict_for_mongo + +from onadata.libs.utils.viewer_tools import create_attachments_zipfile from onadata.libs.utils.common_tags import ( - ID, XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION, BAMBOO_DATASET_ID, - DELETEDAT, USERFORM_ID, INDEX, PARENT_INDEX, PARENT_TABLE_NAME, - SUBMISSION_TIME, UUID, TAGS, NOTES) + USERFORM_ID, INDEX, PARENT_INDEX, PARENT_TABLE_NAME, TAGS, NOTES) from onadata.libs.exceptions import J2XException - - +from onadata.libs.utils.google_sheets import SheetsExportBuilder +from onadata.libs.utils.csv_export import FlatCsvExportBuilder, ZippedCsvExportBuilder +from onadata.libs.utils.sav_export import ZippedSavExportBuilder +from onadata.libs.utils.xls_export import XlsExportBuilder + # this is Mongo Collection where we will store the parsed submissions xform_instances = settings.MONGO_DB.instances QUESTION_TYPES_TO_EXCLUDE = [ u'note', ] -# the bind type of select multiples that we use to compare -MULTIPLE_SELECT_BIND_TYPE = u"select" -GEOPOINT_BIND_TYPE = u"geopoint" - - -def encode_if_str(row, key, encode_dates=False): - val = row.get(key) - - if isinstance(val, six.string_types): - return val.encode('utf-8') - - if encode_dates and isinstance(val, datetime): - return val.strftime('%Y-%m-%dT%H:%M:%S%z').encode('utf-8') - - if encode_dates and isinstance(val, date): - return val.strftime('%Y-%m-%d').encode('utf-8') - - return val def question_types_to_exclude(_type): @@ -126,566 +100,22 @@ def get_observation_from_dict(self, d): return result -def dict_to_joined_export(data, index, indices, name): - """ - Converts a dict into one or more tabular datasets - """ - output = {} - # TODO: test for _geolocation and attachment lists - if isinstance(data, dict): - for key, val in data.iteritems(): - if isinstance(val, list) and key not in [NOTES, TAGS]: - output[key] = [] - for child in val: - if key not in indices: - indices[key] = 0 - indices[key] += 1 - child_index = indices[key] - new_output = dict_to_joined_export( - child, child_index, indices, key) - d = {INDEX: child_index, PARENT_INDEX: index, - PARENT_TABLE_NAME: name} - # iterate over keys within new_output and append to - # main output - for out_key, out_val in new_output.iteritems(): - if isinstance(out_val, list): - if out_key not in output: - output[out_key] = [] - output[out_key].extend(out_val) - else: - d.update(out_val) - output[key].append(d) - else: - if name not in output: - output[name] = {} - if key in [TAGS]: - output[name][key] = ",".join(val) - elif key in [NOTES]: - output[name][key] = "\r\n".join( - [v['note'] for v in val]) - else: - output[name][key] = val - - return output - - -class ExportBuilder(object): - IGNORED_COLUMNS = [XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION, - BAMBOO_DATASET_ID, DELETEDAT] - # fields we export but are not within the form's structure - EXTRA_FIELDS = [ID, UUID, SUBMISSION_TIME, INDEX, PARENT_TABLE_NAME, - PARENT_INDEX, TAGS, NOTES] - SPLIT_SELECT_MULTIPLES = True - BINARY_SELECT_MULTIPLES = False - - # column group delimiters - GROUP_DELIMITER_SLASH = '/' - GROUP_DELIMITER_DOT = '.' - GROUP_DELIMITER = GROUP_DELIMITER_SLASH - GROUP_DELIMITERS = [GROUP_DELIMITER_SLASH, GROUP_DELIMITER_DOT] - TYPES_TO_CONVERT = ['int', 'decimal', 'date'] # , 'dateTime'] - CONVERT_FUNCS = { - 'int': lambda x: int(x), - 'decimal': lambda x: float(x), - 'date': lambda x: ExportBuilder.string_to_date_with_xls_validation(x), - 'dateTime': lambda x: datetime.strptime(x[:19], '%Y-%m-%dT%H:%M:%S') - } - - XLS_SHEET_NAME_MAX_CHARS = 31 - - @classmethod - def string_to_date_with_xls_validation(cls, date_str): - date_obj = datetime.strptime(date_str, '%Y-%m-%d').date() - try: - SharedDate().datetime_to_julian(date_obj) - except ValueError: - return date_str - else: - return date_obj - - @classmethod - def format_field_title(cls, abbreviated_xpath, field_delimiter): - if field_delimiter != '/': - return field_delimiter.join(abbreviated_xpath.split('/')) - return abbreviated_xpath - - def set_survey(self, survey): - # TODO resolve circular import - from onadata.apps.viewer.models.data_dictionary import\ - DataDictionary - - def build_sections( - current_section, survey_element, sections, select_multiples, - gps_fields, encoded_fields, field_delimiter='/'): - for child in survey_element.children: - current_section_name = current_section['name'] - # if a section, recurs - if isinstance(child, Section): - # if its repeating, build a new section - if isinstance(child, RepeatingSection): - # section_name in recursive call changes - section = { - 'name': child.get_abbreviated_xpath(), - 'elements': []} - self.sections.append(section) - build_sections( - section, child, sections, select_multiples, - gps_fields, encoded_fields, field_delimiter) - else: - # its a group, recurs using the same section - build_sections( - current_section, child, sections, select_multiples, - gps_fields, encoded_fields, field_delimiter) - elif isinstance(child, Question) and child.bind.get(u"type")\ - not in QUESTION_TYPES_TO_EXCLUDE: - # add to survey_sections - if isinstance(child, Question): - child_xpath = child.get_abbreviated_xpath() - current_section['elements'].append({ - 'title': ExportBuilder.format_field_title( - child.get_abbreviated_xpath(), - field_delimiter), - 'xpath': child_xpath, - 'type': child.bind.get(u"type") - }) - - if _is_invalid_for_mongo(child_xpath): - if current_section_name not in encoded_fields: - encoded_fields[current_section_name] = {} - encoded_fields[current_section_name].update( - {child_xpath: _encode_for_mongo(child_xpath)}) - - # if its a select multiple, make columns out of its choices - if child.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE\ - and self.SPLIT_SELECT_MULTIPLES: - for c in child.children: - _xpath = c.get_abbreviated_xpath() - _title = ExportBuilder.format_field_title( - _xpath, field_delimiter) - choice = { - 'title': _title, - 'xpath': _xpath, - 'type': 'string' - } - - if choice not in current_section['elements']: - current_section['elements'].append(choice) - _append_xpaths_to_section( - current_section_name, select_multiples, - child.get_abbreviated_xpath(), - [c.get_abbreviated_xpath() - for c in child.children]) - - # split gps fields within this section - if child.bind.get(u"type") == GEOPOINT_BIND_TYPE: - # add columns for geopoint components - xpaths = DataDictionary.get_additional_geopoint_xpaths( - child.get_abbreviated_xpath()) - current_section['elements'].extend( - [ - { - 'title': ExportBuilder.format_field_title( - xpath, field_delimiter), - 'xpath': xpath, - 'type': 'decimal' - } - for xpath in xpaths - ]) - _append_xpaths_to_section( - current_section_name, gps_fields, - child.get_abbreviated_xpath(), xpaths) - - def _append_xpaths_to_section(current_section_name, field_list, xpath, - xpaths): - if current_section_name not in field_list: - field_list[current_section_name] = {} - field_list[ - current_section_name][xpath] = xpaths - - self.survey = survey - self.select_multiples = {} - self.gps_fields = {} - self.encoded_fields = {} - main_section = {'name': survey.name, 'elements': []} - self.sections = [main_section] - build_sections( - main_section, self.survey, self.sections, - self.select_multiples, self.gps_fields, self.encoded_fields, - self.GROUP_DELIMITER) - - def section_by_name(self, name): - matches = filter(lambda s: s['name'] == name, self.sections) - assert(len(matches) == 1) - return matches[0] - - @classmethod - def split_select_multiples(cls, row, select_multiples): - # for each select_multiple, get the associated data and split it - for xpath, choices in select_multiples.iteritems(): - # get the data matching this xpath - data = row.get(xpath) - selections = [] - if data: - selections = [ - u'{0}/{1}'.format( - xpath, selection) for selection in data.split()] - if not cls.BINARY_SELECT_MULTIPLES: - row.update(dict( - [(choice, choice in selections if selections else None) - for choice in choices])) - else: - YES = 1 - NO = 0 - row.update(dict( - [(choice, YES if choice in selections else NO) - for choice in choices])) - return row - - @classmethod - def split_gps_components(cls, row, gps_fields): - # for each gps_field, get associated data and split it - for xpath, gps_components in gps_fields.iteritems(): - data = row.get(xpath) - if data: - gps_parts = data.split() - if len(gps_parts) > 0: - row.update(zip(gps_components, gps_parts)) - return row - - @classmethod - def decode_mongo_encoded_fields(cls, row, encoded_fields): - for xpath, encoded_xpath in encoded_fields.iteritems(): - if row.get(encoded_xpath): - val = row.pop(encoded_xpath) - row.update({xpath: val}) - return row - - @classmethod - def decode_mongo_encoded_section_names(cls, data): - return dict([(_decode_from_mongo(k), v) for k, v in data.iteritems()]) - - @classmethod - def convert_type(cls, value, data_type): - """ - Convert data to its native type e.g. string '1' to int 1 - @param value: the string value to convert - @param data_type: the native data type to convert to - @return: the converted value - """ - func = ExportBuilder.CONVERT_FUNCS.get(data_type, lambda x: x) - try: - return func(value) - except ValueError: - return value - - def pre_process_row(self, row, section): - """ - Split select multiples, gps and decode . and $ - """ - section_name = section['name'] - - # first decode fields so that subsequent lookups - # have decoded field names - if section_name in self.encoded_fields: - row = ExportBuilder.decode_mongo_encoded_fields( - row, self.encoded_fields[section_name]) - - if self.SPLIT_SELECT_MULTIPLES and\ - section_name in self.select_multiples: - row = ExportBuilder.split_select_multiples( - row, self.select_multiples[section_name]) - - if section_name in self.gps_fields: - row = ExportBuilder.split_gps_components( - row, self.gps_fields[section_name]) - - # convert to native types - for elm in section['elements']: - # only convert if its in our list and its not empty, just to - # optimize - value = row.get(elm['xpath']) - if elm['type'] in ExportBuilder.TYPES_TO_CONVERT\ - and value is not None and value != '': - row[elm['xpath']] = ExportBuilder.convert_type( - value, elm['type']) - - return row - - def to_zipped_csv(self, path, data, *args): - def write_row(row, csv_writer, fields): - csv_writer.writerow( - [encode_if_str(row, field) for field in fields]) - - csv_defs = {} - for section in self.sections: - csv_file = NamedTemporaryFile(suffix=".csv") - csv_writer = csv.writer(csv_file) - csv_defs[section['name']] = { - 'csv_file': csv_file, 'csv_writer': csv_writer} - - # write headers - for section in self.sections: - fields = [element['title'] for element in section['elements']]\ - + self.EXTRA_FIELDS - csv_defs[section['name']]['csv_writer'].writerow( - [f.encode('utf-8') for f in fields]) - - index = 1 - indices = {} - survey_name = self.survey.name - for d in data: - # decode mongo section names - joined_export = dict_to_joined_export(d, index, indices, - survey_name) - output = ExportBuilder.decode_mongo_encoded_section_names( - joined_export) - # attach meta fields (index, parent_index, parent_table) - # output has keys for every section - if survey_name not in output: - output[survey_name] = {} - output[survey_name][INDEX] = index - output[survey_name][PARENT_INDEX] = -1 - for section in self.sections: - # get data for this section and write to csv - section_name = section['name'] - csv_def = csv_defs[section_name] - fields = [ - element['xpath'] for element in - section['elements']] + self.EXTRA_FIELDS - csv_writer = csv_def['csv_writer'] - # section name might not exist within the output, e.g. data was - # not provided for said repeat - write test to check this - row = output.get(section_name, None) - if type(row) == dict: - write_row( - self.pre_process_row(row, section), - csv_writer, fields) - elif type(row) == list: - for child_row in row: - write_row( - self.pre_process_row(child_row, section), - csv_writer, fields) - index += 1 - - # write zipfile - with ZipFile(path, 'w') as zip_file: - for section_name, csv_def in csv_defs.iteritems(): - csv_file = csv_def['csv_file'] - csv_file.seek(0) - zip_file.write( - csv_file.name, "_".join(section_name.split("/")) + ".csv") - - # close files when we are done - for section_name, csv_def in csv_defs.iteritems(): - csv_def['csv_file'].close() - - @classmethod - def get_valid_sheet_name(cls, desired_name, existing_names): - # a sheet name has to be <= 31 characters and not a duplicate of an - # existing sheet - # truncate sheet_name to XLSDataFrameBuilder.SHEET_NAME_MAX_CHARS - new_sheet_name = \ - desired_name[:cls.XLS_SHEET_NAME_MAX_CHARS] - - # make sure its unique within the list - i = 1 - generated_name = new_sheet_name - while generated_name in existing_names: - digit_length = len(str(i)) - allowed_name_len = cls.XLS_SHEET_NAME_MAX_CHARS - \ - digit_length - # make name the required len - if len(generated_name) > allowed_name_len: - generated_name = generated_name[:allowed_name_len] - generated_name = "{0}{1}".format(generated_name, i) - i += 1 - return generated_name - - def to_xls_export(self, path, data, *args): - def write_row(data, work_sheet, fields, work_sheet_titles): - # update parent_table with the generated sheet's title - data[PARENT_TABLE_NAME] = work_sheet_titles.get( - data.get(PARENT_TABLE_NAME)) - work_sheet.append([data.get(f) for f in fields]) - - wb = Workbook(optimized_write=True) - work_sheets = {} - # map of section_names to generated_names - work_sheet_titles = {} - for section in self.sections: - section_name = section['name'] - work_sheet_title = ExportBuilder.get_valid_sheet_name( - "_".join(section_name.split("/")), work_sheet_titles.values()) - work_sheet_titles[section_name] = work_sheet_title - work_sheets[section_name] = wb.create_sheet( - title=work_sheet_title) - - # write the headers - for section in self.sections: - section_name = section['name'] - headers = [ - element['title'] for element in - section['elements']] + self.EXTRA_FIELDS - # get the worksheet - ws = work_sheets[section_name] - ws.append(headers) - - index = 1 - indices = {} - survey_name = self.survey.name - for d in data: - joined_export = dict_to_joined_export(d, index, indices, - survey_name) - output = ExportBuilder.decode_mongo_encoded_section_names( - joined_export) - # attach meta fields (index, parent_index, parent_table) - # output has keys for every section - if survey_name not in output: - output[survey_name] = {} - output[survey_name][INDEX] = index - output[survey_name][PARENT_INDEX] = -1 - for section in self.sections: - # get data for this section and write to xls - section_name = section['name'] - fields = [ - element['xpath'] for element in - section['elements']] + self.EXTRA_FIELDS - - ws = work_sheets[section_name] - # section might not exist within the output, e.g. data was - # not provided for said repeat - write test to check this - row = output.get(section_name, None) - if type(row) == dict: - write_row( - self.pre_process_row(row, section), - ws, fields, work_sheet_titles) - elif type(row) == list: - for child_row in row: - write_row( - self.pre_process_row(child_row, section), - ws, fields, work_sheet_titles) - index += 1 - - wb.save(filename=path) - - def to_flat_csv_export( - self, path, data, username, id_string, filter_query): - # TODO resolve circular import - from onadata.apps.viewer.pandas_mongo_bridge import\ - CSVDataFrameBuilder - - csv_builder = CSVDataFrameBuilder( - username, id_string, filter_query, self.GROUP_DELIMITER, - self.SPLIT_SELECT_MULTIPLES, self.BINARY_SELECT_MULTIPLES) - csv_builder.export_to(path) - - def to_zipped_sav(self, path, data, *args): - def write_row(row, csv_writer, fields): - sav_writer.writerow( - [encode_if_str(row, field, True) for field in fields]) - - sav_defs = {} - - # write headers - for section in self.sections: - fields = [element['title'] for element in section['elements']]\ - + self.EXTRA_FIELDS - c = 0 - var_labels = {} - var_names = [] - tmp_k = {} - for field in fields: - c += 1 - var_name = 'var%d' % c - var_labels[var_name] = field - var_names.append(var_name) - tmp_k[field] = var_name - - var_types = dict( - [(tmp_k[element['title']], - 0 if element['type'] in ['decimal', 'int'] else 255) - for element in section['elements']] - + [(tmp_k[item], - 0 if item in ['_id', '_index', '_parent_index'] else 255) - for item in self.EXTRA_FIELDS] - ) - sav_file = NamedTemporaryFile(suffix=".sav") - sav_writer = SavWriter(sav_file.name, varNames=var_names, - varTypes=var_types, - varLabels=var_labels, ioUtf8=True) - sav_defs[section['name']] = { - 'sav_file': sav_file, 'sav_writer': sav_writer} - - index = 1 - indices = {} - survey_name = self.survey.name - for d in data: - # decode mongo section names - joined_export = dict_to_joined_export(d, index, indices, - survey_name) - output = ExportBuilder.decode_mongo_encoded_section_names( - joined_export) - # attach meta fields (index, parent_index, parent_table) - # output has keys for every section - if survey_name not in output: - output[survey_name] = {} - output[survey_name][INDEX] = index - output[survey_name][PARENT_INDEX] = -1 - for section in self.sections: - # get data for this section and write to csv - section_name = section['name'] - sav_def = sav_defs[section_name] - fields = [ - element['xpath'] for element in - section['elements']] + self.EXTRA_FIELDS - sav_writer = sav_def['sav_writer'] - row = output.get(section_name, None) - if type(row) == dict: - write_row( - self.pre_process_row(row, section), - sav_writer, fields) - elif type(row) == list: - for child_row in row: - write_row( - self.pre_process_row(child_row, section), - sav_writer, fields) - index += 1 - - for section_name, sav_def in sav_defs.iteritems(): - sav_def['sav_writer'].closeSavFile( - sav_def['sav_writer'].fh, mode='wb') - - # write zipfile - with ZipFile(path, 'w') as zip_file: - for section_name, sav_def in sav_defs.iteritems(): - sav_file = sav_def['sav_file'] - sav_file.seek(0) - zip_file.write( - sav_file.name, "_".join(section_name.split("/")) + ".sav") - - # close files when we are done - for section_name, sav_def in sav_defs.iteritems(): - sav_def['sav_file'].close() - - -def dict_to_flat_export(d, parent_index=0): - pass - - def generate_export(export_type, extension, username, id_string, export_id=None, filter_query=None, group_delimiter='/', - split_select_multiples=True, - binary_select_multiples=False): + split_select_multiples=True, binary_select_multiples=False, + google_token=None, flatten_repeated_fields=True, + export_xlsform=True): """ Create appropriate export object given the export type """ # TODO resolve circular import from onadata.apps.viewer.models.export import Export - export_type_func_map = { - Export.XLS_EXPORT: 'to_xls_export', - Export.CSV_EXPORT: 'to_flat_csv_export', - Export.CSV_ZIP_EXPORT: 'to_zipped_csv', - Export.SAV_ZIP_EXPORT: 'to_zipped_sav', + export_type_class_map = { + Export.XLS_EXPORT: XlsExportBuilder, + Export.GSHEETS_EXPORT: SheetsExportBuilder, + Export.CSV_EXPORT: FlatCsvExportBuilder, + Export.CSV_ZIP_EXPORT: ZippedCsvExportBuilder, + Export.SAV_ZIP_EXPORT: ZippedSavExportBuilder, } xform = XForm.objects.get( @@ -694,24 +124,34 @@ def generate_export(export_type, extension, username, id_string, # query mongo for the cursor records = query_mongo(username, id_string, filter_query) - export_builder = ExportBuilder() - export_builder.GROUP_DELIMITER = group_delimiter - export_builder.SPLIT_SELECT_MULTIPLES = split_select_multiples - export_builder.BINARY_SELECT_MULTIPLES = binary_select_multiples + spreadsheet_title = "%s_%s" % (id_string, + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) + config = {} + config['group_delimiter'] = group_delimiter + config['split_select_multiples'] = split_select_multiples + config['binary_select_multiples'] = binary_select_multiples + if export_type == Export.GSHEETS_EXPORT: + config['spreadsheet_title'] = spreadsheet_title + config['google_token'] = google_token + config['flatten_repeated_fields'] = flatten_repeated_fields + config['export_xlsform'] = export_xlsform + print 'config: %s' % config + export_builder = export_type_class_map[export_type](xform, config) export_builder.set_survey(xform.data_dictionary().survey) - - temp_file = NamedTemporaryFile(suffix=("." + extension)) - - # get the export function by export type - func = getattr(export_builder, export_type_func_map[export_type]) - - func.__call__( + + if extension: + temp_file = NamedTemporaryFile(suffix=("." + extension)) + else: + temp_file = NamedTemporaryFile(delete=False) + + # run the export + export_builder.export( temp_file.name, records, username, id_string, filter_query) # generate filename - basename = "%s_%s" % ( - id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) - filename = basename + "." + extension + filename = spreadsheet_title + if extension: + filename = filename + "." + extension # check filename is unique while not Export.is_filename_unique(xform, filename): @@ -743,6 +183,10 @@ def generate_export(export_type, extension, username, id_string, export.filedir = dir_name export.filename = basename export.internal_status = Export.SUCCESSFUL + # Get URL of the exported sheet. + if export_type == Export.GSHEETS_EXPORT: + export.export_url = export_builder.url + # dont persist exports that have a filter if filter_query is None: export.save() diff --git a/onadata/libs/utils/google.py b/onadata/libs/utils/google.py index 52864e971..3eb6075ba 100644 --- a/onadata/libs/utils/google.py +++ b/onadata/libs/utils/google.py @@ -3,12 +3,9 @@ import urllib import urllib2 -import gdata +# TODO: gdata is deprecated. For OAuth2 authentication it should be replaced +# by oauth2client. import gdata.gauth -import gdata.docs -import gdata.data -import gdata.docs.client -import gdata.docs.data from django.conf import settings @@ -17,7 +14,8 @@ client_secret=settings.GOOGLE_CLIENT_SECRET, scope=' '.join( ['https://docs.google.com/feeds/', - 'https://spreadsheets.google.com/feeds/']), + 'https://spreadsheets.google.com/feeds/', + 'https://www.googleapis.com/auth/drive.file']), user_agent='formhub') redirect_uri = oauth2_token.generate_authorize_url( @@ -41,20 +39,4 @@ def get_refreshed_token(token): token.access_token = tokens['access_token'] return token - -def google_export_xls(filename, title, token, blob=True): - if blob: - token = gdata.gauth.token_from_blob(token) - if token.refresh_token is not None \ - and token.access_token is not None: - oauth2_token.refresh_token = token.refresh_token - working_token = get_refreshed_token(oauth2_token) - docs_client = gdata.docs.client.DocsClient( - source=oauth2_token.user_agent) - docs_client = working_token.authorize(docs_client) - xls_doc = gdata.docs.data.Resource( - type='spreadsheet', title=title) - media = gdata.data.MediaSource() - media.SetFileHandle(filename, 'application/vnd.ms-excel') - xls_doc = docs_client.CreateResource(xls_doc, media=media) - return xls_doc.find_html_link() + diff --git a/onadata/libs/utils/google_sheets.py b/onadata/libs/utils/google_sheets.py new file mode 100644 index 000000000..ee8c2806e --- /dev/null +++ b/onadata/libs/utils/google_sheets.py @@ -0,0 +1,308 @@ +""" +This module contains classes responsible for communicating with +Google Data API and common spreadsheets models. +""" +import csv +import gdata.gauth +import gspread +import io +import json +import xlrd + +from django.conf import settings +from django.core.files.storage import get_storage_class +from oauth2client.client import SignedJwtAssertionCredentials +from onadata.koboform.pyxform_utils import convert_csv_to_xls +from onadata.libs.utils.google import get_refreshed_token +from onadata.libs.utils.export_builder import ExportBuilder +from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX, PARENT_TABLE_NAME + + +def update_row(worksheet, index, values): + """"Adds a row to the worksheet at the specified index and populates it with values. + Widens the worksheet if there are more values than columns. + :param worksheet: The worksheet to be updated. + :param index: Index of the row to be updated. + :param values: List of values for the row. + """ + data_width = len(values) + if worksheet.col_count < data_width: + worksheet.resize(cols=data_width) + + cell_list = [] + for i, value in enumerate(values, start=1): + cell = worksheet.cell(index, i) + cell.value = value + cell_list.append(cell) + + worksheet.update_cells(cell_list) + + +def xldr_format_value(cell): + """A helper function to format the value of a cell. + The xldr stores integers as floats which means that the cell value + 42 in Excel is returned as 42.0 in Python. This function tries to guess + if the original value was an integer and returns the proper type. + """ + value = cell.value + if cell.ctype == xlrd.XL_CELL_NUMBER and int(value) == value: + value = int(value) + return value + + +class SheetsClient(gspread.client.Client): + """An instance of this class communicates with Google Data API.""" + + AUTH_SCOPE = ' '.join(['https://docs.google.com/feeds/', + 'https://spreadsheets.google.com/feeds/', + 'https://www.googleapis.com/auth/drive.file']) + + DRIVE_API_URL = 'https://www.googleapis.com/drive/v2/files' + + def new(self, title): + headers = {'Content-Type': 'application/json'} + data = { + 'title': title, + 'mimeType': 'application/vnd.google-apps.spreadsheet' + } + r = self.session.request( + 'POST', SheetsClient.DRIVE_API_URL, headers=headers, data=json.dumps(data)) + resp = json.loads(r.read().decode('utf-8')) + sheet_id = resp['id'] + return self.open_by_key(sheet_id) + + + def add_service_account_to_spreadsheet(self, spreadsheet): + url = '%s/%s/permissions' % (SheetsClient.DRIVE_API_URL, spreadsheet.id) + headers = {'Content-Type': 'application/json'} + data = { + 'role': 'writer', + 'type': 'user', + 'value': settings.GOOGLE_CLIENT_EMAIL + } + + self.session.request( + 'POST', url, headers=headers, data=json.dumps(data)) + + @classmethod + def login_with_service_account(cls): + credential = SignedJwtAssertionCredentials(settings.GOOGLE_CLIENT_EMAIL, + settings.GOOGLE_CLIENT_PRIVATE_KEY, scope=SheetsClient.AUTH_SCOPE) + + client = SheetsClient(auth=credential) + client.login() + return client + + @classmethod + def login_with_auth_token(cls, token_string): + # deserialize the token. + token = gdata.gauth.token_from_blob(token_string) + assert token.refresh_token + + # Refresh OAuth token if necessary. + oauth2_token = gdata.gauth.OAuth2Token( + client_id=settings.GOOGLE_CLIENT_ID, + client_secret=settings.GOOGLE_CLIENT_SECRET, + scope=SheetsClient.AUTH_SCOPE, + user_agent='formhub') + oauth2_token.refresh_token = token.refresh_token + refreshed_token = get_refreshed_token(oauth2_token) + + # Create Google Sheet. + client = SheetsClient(auth=refreshed_token) + client.login() + return client + + +class SheetsExportBuilder(ExportBuilder): + client = None + spreadsheet = None + # Worksheets generated by this class. + worksheets = {} + # Map of section_names to generated_names + worksheet_titles = {} + # The URL of the exported sheet. + url = None + + # Configuration options + spreadsheet_title = None + flatten_repeated_fields = True + export_xlsform = True + google_token = None + + # Constants + SHEETS_BASE_URL = 'https://docs.google.com/spreadsheet/ccc?key=%s&hl' + FLATTENED_SHEET_TITLE = 'raw' + + def __init__(self, xform, config): + super(SheetsExportBuilder, self).__init__(xform, config) + self.spreadsheet_title = config['spreadsheet_title'] + self.google_token = config['google_token'] + self.flatten_repeated_fields = config['flatten_repeated_fields'] + self.export_xlsform = config['export_xlsform'] + + def export(self, path, data, username, id_string, filter_query): + self.client = SheetsClient.login_with_auth_token(self.google_token) + + # Create a new sheet + self.spreadsheet = self.client.new(title=self.spreadsheet_title) + self.url = self.SHEETS_BASE_URL % self.spreadsheet.id + + # Add Service account as editor + self.client.add_service_account_to_spreadsheet(self.spreadsheet) + + # Perform the actual export + if self.flatten_repeated_fields: + self.export_flattened(path, data, username, id_string, filter_query) + else: + self.export_tabular(path, data) + + # Write XLSForm data + if self.export_xlsform: + self._insert_xlsform() + + # Delete the default worksheet if it exists + # NOTE: for some reason self.spreadsheet.worksheets() does not contain + # the default worksheet (Sheet1). We therefore need to fetch an + # updated list here. + feed = self.client.get_worksheets_feed(self.spreadsheet) + for elem in feed.findall(gspread.ns._ns('entry')): + ws = gspread.Worksheet(self.spreadsheet, elem) + if ws.title == 'Sheet1': + self.client.del_worksheet(ws) + + def export_flattened(self, path, data, username, id_string, filter_query): + # Build a flattened CSV + from onadata.apps.viewer.pandas_mongo_bridge import CSVDataFrameBuilder + csv_builder = CSVDataFrameBuilder( + username, id_string, filter_query, self.group_delimiter, + self.split_select_multiples, self.binary_select_multiples) + csv_builder.export_to(path) + + # Read CSV back in and filter n/a entries + rows = [] + with open(path) as f: + reader = csv.reader(f) + for row in reader: + filtered_rows = [x if x != 'n/a' else '' for x in row] + rows.append(filtered_rows) + + # Create a worksheet for flattened data + num_rows = len(rows) + if not num_rows: + return + num_cols = len(rows[0]) + ws = self.spreadsheet.add_worksheet( + title=self.FLATTENED_SHEET_TITLE, rows=num_rows, cols=num_cols) + + # Write data row by row + for index, values in enumerate(rows, 1): + update_row(ws, index, values) + + def export_tabular(self, path, data): + # Add worksheets for export. + self._create_worksheets() + + # Write the headers + self._insert_headers() + + # Write the data + self._insert_data(data) + + def _insert_xlsform(self): + """Exports XLSForm (e.g. survey, choices) to the sheet.""" + assert self.client + assert self.spreadsheet + assert self.xform + + file_path = self.xform.xls.name + default_storage = get_storage_class()() + + if file_path == '' or not default_storage.exists(file_path): + # No XLS file for your form + return + + with default_storage.open(file_path) as xlsform_file: + if file_path.endswith('.csv'): + xlsform_io = convert_csv_to_xls(xlsform_file.read()) + else: + xlsform_io = io.BytesIO(xlsform_file.read()) + # Open XForm and copy sheets to Google Sheets. + workbook = xlrd.open_workbook(file_contents=xlsform_io.read(), + formatting_info=True) + for wksht_nm in workbook.sheet_names(): + source_ws = workbook.sheet_by_name(wksht_nm) + num_cols = source_ws.ncols + num_rows = source_ws.nrows + destination_ws = self.spreadsheet.add_worksheet( + title=wksht_nm, rows=num_rows, cols=num_cols) + for row in xrange(num_rows): + update_row(destination_ws, row + 1, + [xldr_format_value(source_ws.cell(row, col)) + for col in xrange(num_cols)] ) + + def _insert_data(self, data): + """Writes data rows for each section.""" + indices = {} + survey_name = self.survey.name + for index, d in enumerate(data, 1): + joined_export = ExportBuilder.dict_to_joined_export( + d, index, indices, survey_name) + output = ExportBuilder.decode_mongo_encoded_section_names( + joined_export) + # attach meta fields (index, parent_index, parent_table) + # output has keys for every section + if survey_name not in output: + output[survey_name] = {} + output[survey_name][INDEX] = index + output[survey_name][PARENT_INDEX] = -1 + for section in self.sections: + # get data for this section and write to xls + section_name = section['name'] + fields = [ + element['xpath'] for element in + section['elements']] + self.EXTRA_FIELDS + + ws = self.worksheets[section_name] + # section might not exist within the output, e.g. data was + # not provided for said repeat - write test to check this + row = output.get(section_name, None) + if type(row) == dict: + SheetsExportBuilder.write_row( + self.pre_process_row(row, section), + ws, fields, self.worksheet_titles) + elif type(row) == list: + for child_row in row: + SheetsExportBuilder.write_row( + self.pre_process_row(child_row, section), + ws, fields, self.worksheet_titles) + + def _insert_headers(self): + """Writes headers for each section.""" + for section in self.sections: + section_name = section['name'] + headers = [ + element['title'] for element in + section['elements']] + self.EXTRA_FIELDS + # get the worksheet + ws = self.worksheets[section_name] + update_row(ws, index=1, values=headers) + + def _create_worksheets(self): + """Creates one worksheet per section.""" + for section in self.sections: + section_name = section['name'] + work_sheet_title = self.get_valid_sheet_name( + "_".join(section_name.split("/")), + self.worksheet_titles.values()) + self.worksheet_titles[section_name] = work_sheet_title + num_cols = len(section['elements']) + len(self.EXTRA_FIELDS) + self.worksheets[section_name] = self.spreadsheet.add_worksheet( + title=work_sheet_title, rows=1, cols=num_cols) + + @classmethod + def write_row(cls, data, worksheet, fields, worksheet_titles): + # update parent_table with the generated sheet's title + data[PARENT_TABLE_NAME] = worksheet_titles.get( + data.get(PARENT_TABLE_NAME)) + worksheet.append_row([data.get(f) for f in fields]) diff --git a/onadata/libs/utils/sav_export.py b/onadata/libs/utils/sav_export.py new file mode 100644 index 000000000..d7f7f81d1 --- /dev/null +++ b/onadata/libs/utils/sav_export.py @@ -0,0 +1,104 @@ +from django.core.files.temp import NamedTemporaryFile +from savReaderWriter import SavWriter +from zipfile import ZipFile + +from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX +from onadata.libs.utils.export_builder import ExportBuilder + + +class ZippedSavExportBuilder(ExportBuilder): + + def __init__(self, xform, config): + super(ZippedSavExportBuilder, self).__init__(xform, config) + + @classmethod + def write_row(cls, row, sav_writer, fields): + sav_writer.writerow( + [ExportBuilder.encode_if_str(row, field, True) for field in fields]) + + + def export(self, path, data, *args): + + sav_defs = {} + + # write headers + for section in self.sections: + fields = [element['title'] for element in section['elements']]\ + + self.EXTRA_FIELDS + c = 0 + var_labels = {} + var_names = [] + tmp_k = {} + for field in fields: + c += 1 + var_name = 'var%d' % c + var_labels[var_name] = field + var_names.append(var_name) + tmp_k[field] = var_name + + var_types = dict( + [(tmp_k[element['title']], + 0 if element['type'] in ['decimal', 'int'] else 255) + for element in section['elements']] + + [(tmp_k[item], + 0 if item in ['_id', '_index', '_parent_index'] else 255) + for item in self.EXTRA_FIELDS] + ) + sav_file = NamedTemporaryFile(suffix=".sav") + sav_writer = SavWriter(sav_file.name, varNames=var_names, + varTypes=var_types, + varLabels=var_labels, ioUtf8=True) + sav_defs[section['name']] = { + 'sav_file': sav_file, 'sav_writer': sav_writer} + + index = 1 + indices = {} + survey_name = self.survey.name + for d in data: + # decode mongo section names + joined_export = ExportBuilder.dict_to_joined_export( + d, index, indices, survey_name) + output = ExportBuilder.decode_mongo_encoded_section_names( + joined_export) + # attach meta fields (index, parent_index, parent_table) + # output has keys for every section + if survey_name not in output: + output[survey_name] = {} + output[survey_name][INDEX] = index + output[survey_name][PARENT_INDEX] = -1 + for section in self.sections: + # get data for this section and write to csv + section_name = section['name'] + sav_def = sav_defs[section_name] + fields = [ + element['xpath'] for element in + section['elements']] + self.EXTRA_FIELDS + sav_writer = sav_def['sav_writer'] + row = output.get(section_name, None) + if type(row) == dict: + ZippedSavExportBuilder.write_row( + self.pre_process_row(row, section), + sav_writer, fields) + elif type(row) == list: + for child_row in row: + ZippedSavExportBuilder.write_row( + self.pre_process_row(child_row, section), + sav_writer, fields) + index += 1 + + for section_name, sav_def in sav_defs.iteritems(): + sav_def['sav_writer'].closeSavFile( + sav_def['sav_writer'].fh, mode='wb') + + # write zipfile + with ZipFile(path, 'w') as zip_file: + for section_name, sav_def in sav_defs.iteritems(): + sav_file = sav_def['sav_file'] + sav_file.seek(0) + zip_file.write( + sav_file.name, "_".join(section_name.split("/")) + ".sav") + + # close files when we are done + for section_name, sav_def in sav_defs.iteritems(): + sav_def['sav_file'].close() + diff --git a/onadata/libs/utils/xls_export.py b/onadata/libs/utils/xls_export.py new file mode 100644 index 000000000..b78f74688 --- /dev/null +++ b/onadata/libs/utils/xls_export.py @@ -0,0 +1,104 @@ +from datetime import datetime + +from openpyxl.date_time import SharedDate +from openpyxl.workbook import Workbook + +from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX, PARENT_TABLE_NAME +from onadata.libs.utils.export_builder import ExportBuilder + + +class XlsExportBuilder(ExportBuilder): + + # Configuration options + group_delimiter = '/' + split_select_multiples = True + binary_select_multiples = False + + CONVERT_FUNCS = { + 'int': lambda x: int(x), + 'decimal': lambda x: float(x), + 'date': lambda x: XlsExportBuilder.string_to_date_with_xls_validation(x), + 'dateTime': lambda x: datetime.strptime(x[:19], '%Y-%m-%dT%H:%M:%S') + } + + def __init__(self, xform, config): + super(XlsExportBuilder, self).__init__(xform, config) + self.group_delimiter = config['group_delimiter'] + self.split_select_multiples = config['split_select_multiples'] + self.binary_select_multiples = config['binary_select_multiples'] + + @classmethod + def string_to_date_with_xls_validation(cls, date_str): + date_obj = datetime.strptime(date_str, '%Y-%m-%d').date() + try: + SharedDate().datetime_to_julian(date_obj) + except ValueError: + return date_str + else: + return date_obj + + @classmethod + def write_row(cls, data, work_sheet, fields, work_sheet_titles): + # update parent_table with the generated sheet's title + data[PARENT_TABLE_NAME] = work_sheet_titles.get( + data.get(PARENT_TABLE_NAME)) + work_sheet.append([data.get(f) for f in fields]) + + def export(self, path, data, *args): + wb = Workbook(optimized_write=True) + work_sheets = {} + # map of section_names to generated_names + work_sheet_titles = {} + for section in self.sections: + section_name = section['name'] + work_sheet_title = self.get_valid_sheet_name( + "_".join(section_name.split("/")), work_sheet_titles.values()) + work_sheet_titles[section_name] = work_sheet_title + work_sheets[section_name] = wb.create_sheet( + title=work_sheet_title) + + # write the headers + for section in self.sections: + section_name = section['name'] + headers = [ + element['title'] for element in + section['elements']] + self.EXTRA_FIELDS + # get the worksheet + ws = work_sheets[section_name] + ws.append(headers) + + indices = {} + survey_name = self.survey.name + for index, d in enumerate(data, 1): + joined_export = ExportBuilder.dict_to_joined_export( + d, index, indices, survey_name) + output = ExportBuilder.decode_mongo_encoded_section_names( + joined_export) + # attach meta fields (index, parent_index, parent_table) + # output has keys for every section + if survey_name not in output: + output[survey_name] = {} + output[survey_name][INDEX] = index + output[survey_name][PARENT_INDEX] = -1 + for section in self.sections: + # get data for this section and write to xls + section_name = section['name'] + fields = [ + element['xpath'] for element in + section['elements']] + self.EXTRA_FIELDS + + ws = work_sheets[section_name] + # section might not exist within the output, e.g. data was + # not provided for said repeat - write test to check this + row = output.get(section_name, None) + if type(row) == dict: + XlsExportBuilder.write_row( + self.pre_process_row(row, section), + ws, fields, work_sheet_titles) + elif type(row) == list: + for child_row in row: + XlsExportBuilder.write_row( + self.pre_process_row(child_row, section), + ws, fields, work_sheet_titles) + + wb.save(filename=path) diff --git a/onadata/settings/common.py b/onadata/settings/common.py index d45b30cd3..528a50506 100644 --- a/onadata/settings/common.py +++ b/onadata/settings/common.py @@ -224,6 +224,7 @@ 'onadata.apps.stats', 'onadata.apps.sms_support', 'onadata.libs', + 'onadata.apps.sheets_sync', ) OAUTH2_PROVIDER = { @@ -397,9 +398,23 @@ def configure_logging(logger, **kwargs): 'PASSWORD': '' } -GOOGLE_STEP2_URI = 'http://ona.io/gwelcome' -GOOGLE_CLIENT_ID = '617113120802.onadata.apps.googleusercontent.com' -GOOGLE_CLIENT_SECRET = '9reM29qpGFPyI8TBuB54Z4fk' +GOOGLE_STEP2_URI = 'http://localhost:8001/gwelcome' +GOOGLE_CLIENT_ID = '896862299299-mv5q1t7qmljc3m4f7l74n0c1nf7pdcqn.apps.googleusercontent.com' +GOOGLE_CLIENT_SECRET = 'rRYajhQEuQszfx8jW0nfehgT' +GOOGLE_CLIENT_EMAIL = os.environ.get('GOOGLE_CLIENT_EMAIL', + '896862299299-c651sc4ne7t9v23bk70s7m70b37h9e3k@developer.gserviceaccount.com') +GOOGLE_CLIENT_PRIVATE_KEY_PATH = os.environ.get('GOOGLE_CLIENT_PRIVATE_KEY_PATH', + os.path.join(PROJECT_ROOT, 'settings/google-private-key.p12')) + +def _get_google_client_private_key(): + try: + with open(GOOGLE_CLIENT_PRIVATE_KEY_PATH) as f: + return f.read() + except EnvironmentError as e: + print 'Could not open private key file: %s' % e + +GOOGLE_CLIENT_PRIVATE_KEY = _get_google_client_private_key() + THUMB_CONF = { 'large': {'size': 1280, 'suffix': '-large'}, diff --git a/requirements/base.pip b/requirements/base.pip index b94342b43..deab49b42 100644 --- a/requirements/base.pip +++ b/requirements/base.pip @@ -8,6 +8,7 @@ django-guardian==1.2.4 django-registration-redux==1.1 django-templated-email==0.4.9 gdata==2.0.18 +gspread==0.2.5 httplib2==0.9 mock==1.0.1 httmock==1.2.2 @@ -60,6 +61,7 @@ django-taggit==0.12.1 # oath2 support django-oauth-toolkit==0.7.2 +oauth2client==1.4.12 # spss https://bitbucket.org/fomcl/savreaderwriter/downloads/savReaderWriter-3.3.0.zip#egg=savreaderwriter