diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_children.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_children.csv
new file mode 100644
index 000000000..88bc23621
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_children.csv
@@ -0,0 +1,3 @@
+children/childs_name,children/childs_age,_id,_uuid,_submission_time,_index,_parent_table_name,_parent_index,_tags,_notes
+Tom,12,,,,1,tutorial_w_repeats,1,,
+Dick,5,,,,2,tutorial_w_repeats,1,,
diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_choices.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_choices.csv
new file mode 100644
index 000000000..651494261
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_choices.csv
@@ -0,0 +1,8 @@
+list name,name,label
+yes_no,0,no
+yes_no,1,yes
+,,
+browsers,firefox,Mozilla Firefox
+browsers,chrome,Google Chrome
+browsers,ie,Internet Explorer
+browsers,safari,Safari
diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_flattened_raw.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_flattened_raw.csv
new file mode 100644
index 000000000..47d193a1b
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_flattened_raw.csv
@@ -0,0 +1,2 @@
+name,age,picture,has_children,children[1]/childs_age,children[1]/childs_name,children[2]/childs_age,children[2]/childs_name,gps,_gps_latitude,_gps_longitude,_gps_altitude,_gps_precision,web_browsers,meta/instanceID,_uuid,_submission_time,_tags,_notes
+Bob,25,,1,12,Tom,5,Dick,-1.2625621 36.7921711 0.0 20.0,-1.2625621,36.7921711,0.0,20.0,,uuid:b31c6ac2-b8ca-4180-914f-c844fa10ed3b,b31c6ac2-b8ca-4180-914f-c844fa10ed3b,2013-02-18T15:54:01,,
diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_survey.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_survey.csv
new file mode 100644
index 000000000..ac02574dc
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_survey.csv
@@ -0,0 +1,11 @@
+type,name,label
+text,name,1. What is your name?
+integer,age,2. How old are you?
+image,picture,3. May I take your picture?
+select one from yes_no,has_children,4. Do you have any children?
+begin repeat,children,5. Children
+text,childs_name,5.1 Childs name?
+integer,childs_age,5.2 Childs age?
+end repeat,,
+geopoint,gps,5. Record your GPS coordinates.
+select all that apply from browsers,web_browsers,6. What web browsers do you use?
diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/expected_tutorial_w_repeats.csv b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_tutorial_w_repeats.csv
new file mode 100644
index 000000000..45674287a
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/google_sheets_export/expected_tutorial_w_repeats.csv
@@ -0,0 +1,2 @@
+name,age,picture,has_children,gps,_gps_latitude,_gps_longitude,_gps_altitude,_gps_precision,web_browsers,web_browsers/firefox,web_browsers/chrome,web_browsers/ie,web_browsers/safari,meta/instanceID,_id,_uuid,_submission_time,_index,_parent_table_name,_parent_index,_tags,_notes
+Bob,25,,1,-1.2625621 36.7921711 0.0 20.0,-1.2625621,36.7921711,0.0,20.0,,,,,,uuid:b31c6ac2-b8ca-4180-914f-c844fa10ed3b,###EXPORT_ID###,b31c6ac2-b8ca-4180-914f-c844fa10ed3b,2013-02-18T15:54:01,1,,-1,,
diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xls b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xls
new file mode 100755
index 000000000..71aea101d
Binary files /dev/null and b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xls differ
diff --git a/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xml b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xml
new file mode 100644
index 000000000..a68cbc554
--- /dev/null
+++ b/onadata/apps/main/tests/fixtures/google_sheets_export/tutorial_w_repeats.xml
@@ -0,0 +1 @@
+Bob251Tom12Dick5-1.2625621 36.7921711 0.0 20.0uuid:b31c6ac2-b8ca-4180-914f-c844fa10ed3b
diff --git a/onadata/apps/main/tests/test_google_docs_export.py b/onadata/apps/main/tests/test_google_docs_export.py
deleted file mode 100644
index 16abdf82d..000000000
--- a/onadata/apps/main/tests/test_google_docs_export.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from django.core.urlresolvers import reverse
-from mock import Mock, patch
-
-from onadata.apps.main.google_export import refresh_access_token
-from onadata.apps.main.models import TokenStorageModel
-from onadata.apps.viewer.views import google_xls_export
-from onadata.libs.utils.google import oauth2_token as token
-from test_base import TestBase
-
-
-class TestGoogleDocsExport(TestBase):
-
- def setUp(self):
- self.token = token
- self.refresh_token = '1/ISGBd-OBWr-RbXN2Fq879Xht1inmg_n4sJ_Wd4CoQNk'
- self.token.refresh_token = self.refresh_token
- self._create_user_and_login()
-
- @patch('gdata.docs.client.DocsClient')
- @patch('urllib2.urlopen')
- def test_google_docs_export(self, mock_urlopen, mock_docs_client):
- mock_urlopen.return_value.read.return_value = '{"access_token": "ABC"}'
- mock_xls_doc = Mock()
- mock_xls_doc.find_html_link.return_value = 'link'
- mock_docs_client = Mock()
- mock_docs_client.CreateResource.return_value = mock_xls_doc
- mock_docs_client.return_value = mock_docs_client
-
- self._publish_transportation_form()
- self._make_submissions()
-
- initial_token_count = TokenStorageModel.objects.all().count()
- self.token = refresh_access_token(self.token, self.user)
-
- self.assertIsNotNone(self.token.access_token)
- self.assertEqual(
- TokenStorageModel.objects.all().count(), initial_token_count + 1)
-
- response = self.client.get(reverse(google_xls_export, kwargs={
- 'username': self.user.username,
- 'id_string': self.xform.id_string
- }))
- self.assertEqual(response.status_code, 302)
- # share the data, log out, and check the export
- self._share_form_data()
- self._logout()
- response = self.client.get(reverse(google_xls_export, kwargs={
- 'username': self.user.username,
- 'id_string': self.xform.id_string
- }))
- self.assertEqual(response.status_code, 302)
diff --git a/onadata/apps/main/tests/test_google_sheets_export.py b/onadata/apps/main/tests/test_google_sheets_export.py
new file mode 100644
index 000000000..8ea750ad8
--- /dev/null
+++ b/onadata/apps/main/tests/test_google_sheets_export.py
@@ -0,0 +1,157 @@
+import csv
+import os
+
+import gdata.gauth
+
+from django.conf import settings
+from django.core.files.storage import get_storage_class
+from django.core.files.temp import NamedTemporaryFile
+from django.core.urlresolvers import reverse
+from django.utils.dateparse import parse_datetime
+from mock import Mock, patch
+
+from onadata.apps.viewer.models.export import Export
+from onadata.libs.utils.export_tools import generate_export
+from onadata.libs.utils.google import oauth2_token
+from onadata.libs.utils.google_sheets import SheetsClient
+from test_base import TestBase
+
+
+
+class MockCell():
+ def __init__(self, row, col, value):
+ self.row = row
+ self.col = col
+ self.value = value
+
+
+class TestExport(TestBase):
+
+ def setUp(self):
+ # Prepare a fake token.
+ self.token = oauth2_token
+ self.token.refresh_token = 'foo'
+ self.token.access_token = 'bar'
+ self.token_blob = gdata.gauth.token_to_blob(self.token)
+
+ # Files that contain the expected spreadsheet data.
+ self.fixture_dir = os.path.join(
+ self.this_directory, 'fixtures', 'google_sheets_export')
+
+ # Create a test user and login.
+ self._create_user_and_login()
+
+ # Create a test submission.
+ path = os.path.join(self.fixture_dir, 'tutorial_w_repeats.xls')
+ self._publish_xls_file_and_set_xform(path)
+ path = os.path.join(self.fixture_dir, 'tutorial_w_repeats.xml')
+ self._submission_time = parse_datetime('2013-02-18 15:54:01Z')
+ self._make_submission(
+ path, forced_submission_time=self._submission_time)
+
+
+ def _mock_worksheet(self, csv_writer):
+ """Creates a mock worksheet object with append_row and insert_row
+ methods writing to csv_writer."""
+ worksheet = Mock()
+ worksheet.append_row.side_effect = \
+ lambda values: csv_writer.writerow(values)
+ def create_cell(r, c):
+ return MockCell(r, c, None)
+ worksheet.cell.side_effect = create_cell
+ worksheet.update_cells.side_effect = \
+ lambda cells: csv_writer.writerow([cell.value for cell in cells])
+ worksheet.insert_row.side_effect = \
+ lambda values, index: csv_writer.writerow(values)
+ return worksheet
+
+ def _mock_spreadsheet(self, csv_writers):
+ spreadsheet = Mock()
+ spreadsheet.add_worksheet.side_effect = \
+ [self._mock_worksheet(writer) for writer in csv_writers]
+ return spreadsheet
+
+ def _setup_result_files(self, expected_file_names):
+ expected_files = [open(os.path.join(self.fixture_dir, f))
+ for f in expected_file_names]
+ result_files = [NamedTemporaryFile() for f in expected_file_names]
+ csv_writers = [csv.writer(f, lineterminator='\n') for f in result_files]
+ return expected_files, result_files, csv_writers
+
+ def assertStorageExists(self, export):
+ storage = get_storage_class()()
+ self.assertTrue(storage.exists(export.filepath))
+ _, ext = os.path.splitext(export.filename)
+ self.assertEqual(ext, '.gsheets')
+
+ def assertEqualExportFiles(self, expected_files, result_files, export):
+ for result, expected in zip(result_files, expected_files):
+ result.flush()
+ result.seek(0)
+ expected_content = expected.read()
+ # Fill in the actual export id (varies based on test order)
+ expected_content = expected_content.replace('###EXPORT_ID###',
+ str(export.id))
+ result_content = result.read()
+ self.assertEquals(result_content, expected_content)
+
+
+ @patch.object(SheetsClient, 'new')
+ @patch.object(SheetsClient, 'add_service_account_to_spreadsheet')
+ @patch.object(SheetsClient, 'get_worksheets_feed')
+ @patch('urllib2.urlopen')
+ def test_gsheets_export_output(self,
+ mock_urlopen,
+ mock_get_worksheets,
+ mock_account_add_service_account,
+ mock_new):
+ expected_files, result_files, csv_writers = self._setup_result_files(
+ ['expected_tutorial_w_repeats.csv',
+ 'expected_children.csv',
+ 'expected_survey.csv',
+ 'expected_choices.csv'])
+ mock_urlopen.return_value.read.return_value = '{"access_token": "baz"}'
+ mock_new.return_value = self._mock_spreadsheet(csv_writers)
+
+ # Test Google Sheets export.
+ export = generate_export(export_type=Export.GSHEETS_EXPORT,
+ extension='gsheets',
+ username=self.user.username,
+ id_string='tutorial_w_repeats',
+ split_select_multiples=True,
+ binary_select_multiples=False,
+ google_token=self.token_blob,
+ flatten_repeated_fields=False,
+ export_xlsform=True)
+ self.assertStorageExists(export)
+ self.assertEqualExportFiles(expected_files, result_files, export)
+
+
+ @patch.object(SheetsClient, 'new')
+ @patch.object(SheetsClient, 'add_service_account_to_spreadsheet')
+ @patch.object(SheetsClient, 'get_worksheets_feed')
+ @patch('urllib2.urlopen')
+ def test_gsheets_export_flattened_output(self,
+ mock_urlopen,
+ mock_get_worksheets,
+ mock_account_add_service_account,
+ mock_new):
+ expected_files, result_files, csv_writers = self._setup_result_files(
+ ['expected_flattened_raw.csv'])
+ mock_urlopen.return_value.read.return_value = '{"access_token": "baz"}'
+ mock_spreadsheet = self._mock_spreadsheet(csv_writers)
+ mock_new.return_value = mock_spreadsheet
+
+ # Test Google Sheets export.
+ export = generate_export(export_type=Export.GSHEETS_EXPORT,
+ extension='gsheets',
+ username=self.user.username,
+ id_string='tutorial_w_repeats',
+ split_select_multiples=False,
+ binary_select_multiples=False,
+ google_token=self.token_blob,
+ flatten_repeated_fields=True,
+ export_xlsform=False)
+ self.assertStorageExists(export)
+ self.assertEqualExportFiles(expected_files, result_files, export)
+
\ No newline at end of file
diff --git a/onadata/apps/main/urls.py b/onadata/apps/main/urls.py
index 64a0cf779..df161f113 100644
--- a/onadata/apps/main/urls.py
+++ b/onadata/apps/main/urls.py
@@ -152,8 +152,6 @@
'onadata.apps.viewer.views.kml_export'),
url(r"^(?P\w+)/forms/(?P[^/]+)/data\.zip",
'onadata.apps.viewer.views.zip_export'),
- url(r"^(?P\w+)/forms/(?P[^/]+)/gdocs$",
- 'onadata.apps.viewer.views.google_xls_export'),
url(r"^(?P\w+)/forms/(?P[^/]+)/map_embed",
'onadata.apps.viewer.views.map_embed_view'),
url(r"^(?P\w+)/forms/(?P[^/]+)/map",
diff --git a/onadata/apps/viewer/models/export.py b/onadata/apps/viewer/models/export.py
index 660910dac..1516edd49 100644
--- a/onadata/apps/viewer/models/export.py
+++ b/onadata/apps/viewer/models/export.py
@@ -28,7 +28,7 @@ def __str__(self):
CSV_EXPORT = 'csv'
KML_EXPORT = 'kml'
ZIP_EXPORT = 'zip'
- GDOC_EXPORT = 'gdoc'
+ GSHEETS_EXPORT = 'gsheets'
CSV_ZIP_EXPORT = 'csv_zip'
SAV_ZIP_EXPORT = 'sav_zip'
SAV_EXPORT = 'sav'
@@ -48,7 +48,7 @@ def __str__(self):
EXPORT_TYPES = [
(XLS_EXPORT, 'Excel'),
(CSV_EXPORT, 'CSV'),
- (GDOC_EXPORT, 'GDOC'),
+ (GSHEETS_EXPORT, 'Google Sheets'),
(ZIP_EXPORT, 'ZIP'),
(KML_EXPORT, 'kml'),
(CSV_ZIP_EXPORT, 'CSV ZIP'),
diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py
index d466b10ab..d632f44b6 100644
--- a/onadata/apps/viewer/tasks.py
+++ b/onadata/apps/viewer/tasks.py
@@ -30,9 +30,8 @@ def _create_export(xform, export_type):
'export_id': export.id,
'query': query,
}
- if export_type in [Export.XLS_EXPORT, Export.GDOC_EXPORT,
- Export.CSV_EXPORT, Export.CSV_ZIP_EXPORT,
- Export.SAV_ZIP_EXPORT]:
+ if export_type in [Export.XLS_EXPORT, Export.CSV_EXPORT,
+ Export.CSV_ZIP_EXPORT, Export.SAV_ZIP_EXPORT]:
if options and "group_delimiter" in options:
arguments["group_delimiter"] = options["group_delimiter"]
if options and "split_select_multiples" in options:
@@ -43,7 +42,7 @@ def _create_export(xform, export_type):
options["binary_select_multiples"]
# start async export
- if export_type in [Export.XLS_EXPORT, Export.GDOC_EXPORT]:
+ if export_type == Export.XLS_EXPORT:
result = create_xls_export.apply_async((), arguments, countdown=10)
elif export_type == Export.CSV_EXPORT:
result = create_csv_export.apply_async(
@@ -56,6 +55,23 @@ def _create_export(xform, export_type):
(), arguments, countdown=10)
else:
raise Export.ExportTypeError
+ elif export_type == Export.GSHEETS_EXPORT:
+ if options and "group_delimiter" in options:
+ arguments["group_delimiter"] = options["group_delimiter"]
+ if options and "split_select_multiples" in options:
+ arguments["split_select_multiples"] =\
+ options["split_select_multiples"]
+ if options and "binary_select_multiples" in options:
+ arguments["binary_select_multiples"] =\
+ options["binary_select_multiples"]
+ if options and "google_token" in options:
+ arguments["google_token"] = options["google_token"]
+ if options and "flatten_repeated_fields" in options:
+ arguments["flatten_repeated_fields"] =\
+ options["flatten_repeated_fields"]
+ if options and "export_xlsform" in options:
+ arguments["export_xlsform"] = options["export_xlsform"]
+ result = create_gsheets_export.apply_async((), arguments, countdown=10)
elif export_type == Export.ZIP_EXPORT:
# start async export
result = create_zip_export.apply_async(
@@ -125,6 +141,44 @@ def create_xls_export(username, id_string, export_id, query=None,
else:
return gen_export.id
+@task()
+def create_gsheets_export(
+ username, id_string, export_id, query=None, group_delimiter='/',
+ split_select_multiples=True, binary_select_multiples=False,
+ google_token=None, flatten_repeated_fields=True, export_xlsform=True):
+ # we re-query the db instead of passing model objects according to
+ # http://docs.celeryproject.org/en/latest/userguide/tasks.html#state
+ try:
+ export = Export.objects.get(id=export_id)
+ except Export.DoesNotExist:
+ # no export for this ID return None.
+ return None
+
+ # though export is not available when for has 0 submissions, we
+ # catch this since it potentially stops celery
+ try:
+ gen_export = generate_export(
+ Export.GSHEETS_EXPORT, None, username, id_string, export_id, query,
+ group_delimiter, split_select_multiples, binary_select_multiples,
+ google_token, flatten_repeated_fields, export_xlsform)
+ except (Exception, NoRecordsFoundError) as e:
+ export.internal_status = Export.FAILED
+ export.save()
+ # mail admins
+ details = {
+ 'export_id': export_id,
+ 'username': username,
+ 'id_string': id_string
+ }
+ report_exception("Google Sheets Export Exception: Export ID - "
+ "%(export_id)s, /%(username)s/%(id_string)s"
+ % details, e, sys.exc_info())
+ # Raise for now to let celery know we failed
+ # - doesnt seem to break celery`
+ raise
+ else:
+ return gen_export.id
+
@task()
def create_csv_export(username, id_string, export_id, query=None,
diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py
index fd64b150c..217b83f6e 100644
--- a/onadata/apps/viewer/views.py
+++ b/onadata/apps/viewer/views.py
@@ -34,7 +34,7 @@
kml_export_data,
newset_export_for)
from onadata.libs.utils.image_tools import image_url
-from onadata.libs.utils.google import google_export_xls, redirect_uri
+from onadata.libs.utils.google import redirect_uri
from onadata.libs.utils.log import audit_log, Actions
from onadata.libs.utils.logger_tools import response_with_mimetype_and_name,\
disposition_ext_and_date
@@ -298,7 +298,18 @@ def create_export(request, username, id_string, export_type):
xform = get_object_or_404(XForm, id_string__exact=id_string, user=owner)
if not has_permission(xform, owner, request):
return HttpResponseForbidden(_(u'Not shared.'))
-
+
+ token = None
+ if export_type == Export.GSHEETS_EXPORT:
+ redirect_url = reverse(
+ create_export,
+ kwargs={
+ 'username': username, 'id_string': id_string,
+ 'export_type': export_type})
+ token = _get_google_token(request, redirect_url)
+ if isinstance(token, HttpResponse):
+ return token
+
if export_type == Export.EXTERNAL_EXPORT:
# check for template before trying to generate a report
if not MetaData.external_export(xform=xform):
@@ -313,11 +324,10 @@ def create_export(request, username, id_string, export_type):
return HttpResponseBadRequest(
_("%s is not a valid delimiter" % group_delimiter))
- # default is True, so when dont_.. is yes
- # split_select_multiples becomes False
- split_select_multiples = request.POST.get(
- "options[dont_split_select_multiples]", "no") == "no"
-
+ split_select_multiples = "options[split_select_multiples]" in request.POST
+ flatten_repeated_fields = "options[flatten_repeated_fields]" in request.POST
+ export_xlsform = "options[export_xlsform]" in request.POST
+
binary_select_multiples = getattr(settings, 'BINARY_SELECT_MULTIPLES',
False)
# external export option
@@ -326,9 +336,13 @@ def create_export(request, username, id_string, export_type):
'group_delimiter': group_delimiter,
'split_select_multiples': split_select_multiples,
'binary_select_multiples': binary_select_multiples,
+ 'flatten_repeated_fields': flatten_repeated_fields,
+ 'export_xlsform': export_xlsform,
'meta': meta.replace(",", "") if meta else None
}
-
+ if token:
+ options['google_token'] = token
+
try:
create_async_export(xform, export_type, query, force_xlsx, options)
except Export.ExportTypeError:
@@ -374,7 +388,7 @@ def _get_google_token(request, redirect_to_url):
def export_list(request, username, id_string, export_type):
- if export_type == Export.GDOC_EXPORT:
+ if export_type == Export.GSHEETS_EXPORT:
redirect_url = reverse(
export_list,
kwargs={
@@ -455,27 +469,8 @@ def export_progress(request, username, id_string, export_type):
'filename': export.filename
})
status['filename'] = export.filename
- if export.export_type == Export.GDOC_EXPORT and \
- export.export_url is None:
- redirect_url = reverse(
- export_progress,
- kwargs={
- 'username': username, 'id_string': id_string,
- 'export_type': export_type})
- token = _get_google_token(request, redirect_url)
- if isinstance(token, HttpResponse):
- return token
- status['url'] = None
- try:
- url = google_export_xls(
- export.full_filepath, xform.title, token, blob=True)
- except Exception, e:
- status['error'] = True
- status['message'] = e.message
- else:
- export.export_url = url
- export.save()
- status['url'] = url
+ if export.export_type == Export.GSHEETS_EXPORT:
+ status['url'] = export.export_url
if export.export_type == Export.EXTERNAL_EXPORT \
and export.export_url is None:
status['url'] = url
@@ -499,7 +494,7 @@ def export_download(request, username, id_string, export_type, filename):
# find the export entry in the db
export = get_object_or_404(Export, xform=xform, filename=filename)
- if (export_type == Export.GDOC_EXPORT or export_type == Export.EXTERNAL_EXPORT) \
+ if (export_type == Export.GSHEETS_EXPORT or export_type == Export.EXTERNAL_EXPORT) \
and export.export_url is not None:
return HttpResponseRedirect(export.export_url)
@@ -645,55 +640,6 @@ def kml_export(request, username, id_string):
return response
-def google_xls_export(request, username, id_string):
- token = None
- if request.user.is_authenticated():
- try:
- ts = TokenStorageModel.objects.get(id=request.user)
- except TokenStorageModel.DoesNotExist:
- pass
- else:
- token = ts.token
- elif request.session.get('access_token'):
- token = request.session.get('access_token')
-
- if token is None:
- request.session["google_redirect_url"] = reverse(
- google_xls_export,
- kwargs={'username': username, 'id_string': id_string})
- return HttpResponseRedirect(redirect_uri)
-
- owner = get_object_or_404(User, username__iexact=username)
- xform = get_object_or_404(XForm, id_string__exact=id_string, user=owner)
- if not has_permission(xform, owner, request):
- return HttpResponseForbidden(_(u'Not shared.'))
-
- valid, dd = dd_for_params(id_string, owner, request)
- if not valid:
- return dd
-
- ddw = XlsWriter()
- tmp = NamedTemporaryFile(delete=False)
- ddw.set_file(tmp)
- ddw.set_data_dictionary(dd)
- temp_file = ddw.save_workbook_to_file()
- temp_file.close()
- url = google_export_xls(tmp.name, xform.title, token, blob=True)
- os.unlink(tmp.name)
- audit = {
- "xform": xform.id_string,
- "export_type": "google"
- }
- audit_log(
- Actions.EXPORT_CREATED, request.user, owner,
- _("Created Google Docs export on '%(id_string)s'.") %
- {
- 'id_string': xform.id_string,
- }, audit, request)
-
- return HttpResponseRedirect(url)
-
-
def data_view(request, username, id_string):
owner = get_object_or_404(User, username__iexact=username)
xform = get_object_or_404(XForm, id_string__exact=id_string, user=owner)
diff --git a/onadata/libs/utils/csv_export.py b/onadata/libs/utils/csv_export.py
new file mode 100644
index 000000000..8c4a8a880
--- /dev/null
+++ b/onadata/libs/utils/csv_export.py
@@ -0,0 +1,83 @@
+import csv
+
+from django.core.files.temp import NamedTemporaryFile
+
+from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX
+from onadata.libs.utils.export_builder import ExportBuilder
+
+
+class FlatCsvExportBuilder(ExportBuilder):
+
+
+ def __init__(self, xform, config):
+ super(FlatCsvExportBuilder, self).__init__(xform, config)
+
+ def export(self, path, data, username, id_string, filter_query):
+ # TODO resolve circular import
+ from onadata.apps.viewer.pandas_mongo_bridge import CSVDataFrameBuilder
+
+ csv_builder = CSVDataFrameBuilder(
+ username, id_string, filter_query, self.group_delimiter,
+ self.split_select_multiples, self.binary_select_multiples)
+ csv_builder.export_to(path)
+
+
+class ZippedCsvExportBuilder(ExportBuilder):
+
+ def __init__(self, xform, config):
+ super(FlatCsvExportBuilder, self).__init__(xform, config)
+
+ @classmethod
+ def write_row(row, csv_writer, fields):
+ csv_writer.writerow(
+ [ExportBuilder.encode_if_str(row, field) for field in fields])
+
+ def export(self, path, data, *args):
+ csv_defs = {}
+ for section in self.sections:
+ csv_file = NamedTemporaryFile(suffix=".csv")
+ csv_writer = csv.writer(csv_file)
+ csv_defs[section['name']] = {
+ 'csv_file': csv_file, 'csv_writer': csv_writer}
+
+ # write headers
+ for section in self.sections:
+ fields = ([element['title'] for element in section['elements']] +
+ self.EXTRA_FIELDS)
+ csv_defs[section['name']]['csv_writer'].writerow(
+ [f.encode('utf-8') for f in fields])
+
+ indices = {}
+ survey_name = self.survey.name
+ for index, d in enumerate(data, 1):
+ # decode mongo section names
+ joined_export = ExportBuilder.dict_to_joined_export(
+ d, index, indices, survey_name)
+ output = ExportBuilder.decode_mongo_encoded_section_names(
+ joined_export)
+ # attach meta fields (index, parent_index, parent_table)
+ # output has keys for every section
+ if survey_name not in output:
+ output[survey_name] = {}
+ output[survey_name][INDEX] = index
+ output[survey_name][PARENT_INDEX] = -1
+ for section in self.sections:
+ # get data for this section and write to csv
+ section_name = section['name']
+ csv_def = csv_defs[section_name]
+ fields = [
+ element['xpath'] for element in
+ section['elements']] + self.EXTRA_FIELDS
+ csv_writer = csv_def['csv_writer']
+ # section name might not exist within the output, e.g. data was
+ # not provided for said repeat - write test to check this
+ row = output.get(section_name, None)
+ if type(row) == dict:
+ ZippedCsvExportBuilder.write_row(
+ self.pre_process_row(row, section),
+ csv_writer, fields)
+ elif type(row) == list:
+ for child_row in row:
+ ZippedCsvExportBuilder.write_row(
+ self.pre_process_row(child_row, section),
+ csv_writer, fields)
diff --git a/onadata/libs/utils/export_builder.py b/onadata/libs/utils/export_builder.py
new file mode 100644
index 000000000..0817fec97
--- /dev/null
+++ b/onadata/libs/utils/export_builder.py
@@ -0,0 +1,346 @@
+import six
+
+from datetime import datetime, date
+from pyxform.question import Question
+from pyxform.section import Section, RepeatingSection
+
+from onadata.apps.viewer.models.parsed_instance import\
+ _is_invalid_for_mongo, _encode_for_mongo, _decode_from_mongo
+from onadata.libs.utils.common_tags import (
+ ID, XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION, BAMBOO_DATASET_ID,
+ DELETEDAT, INDEX, PARENT_INDEX, PARENT_TABLE_NAME,
+ SUBMISSION_TIME, UUID, TAGS, NOTES)
+
+QUESTION_TYPES_TO_EXCLUDE = [
+ u'note',
+]
+# the bind type of select multiples that we use to compare
+MULTIPLE_SELECT_BIND_TYPE = u"select"
+GEOPOINT_BIND_TYPE = u"geopoint"
+
+
+class ExportBuilder(object):
+ """A base class for export builders."""
+
+ IGNORED_COLUMNS = [XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION,
+ BAMBOO_DATASET_ID, DELETEDAT]
+ # fields we export but are not within the form's structure
+ EXTRA_FIELDS = [ID, UUID, SUBMISSION_TIME, INDEX, PARENT_TABLE_NAME,
+ PARENT_INDEX, TAGS, NOTES]
+
+ # column group delimiters
+ GROUP_DELIMITER_SLASH = '/'
+ GROUP_DELIMITER_DOT = '.'
+ GROUP_DELIMITERS = [GROUP_DELIMITER_SLASH, GROUP_DELIMITER_DOT]
+ TYPES_TO_CONVERT = ['int', 'decimal', 'date'] # , 'dateTime']
+ CONVERT_FUNCS = {
+ 'int': lambda x: int(x),
+ 'decimal': lambda x: float(x),
+ 'date': lambda x: datetime.strptime(x, "%d%m%Y").date(),
+ 'dateTime': lambda x: datetime.strptime(x[:19], '%Y-%m-%dT%H:%M:%S')
+ }
+ SHEET_NAME_MAX_CHARS = 31
+ SHEET_TITLE = 'export'
+
+ # Configuration options
+ group_delimiter = '/'
+ split_select_multiples = True
+ binary_select_multiples = False
+
+ def __init__(self, xform, config=None):
+ self.xform = xform
+ self.group_delimiter = config.get(
+ 'group_delimiter', self.GROUP_DELIMITER_SLASH)
+ self.split_select_multiples = config.get(
+ 'split_select_multiples', True)
+ self.binary_select_multiples = config.get(
+ 'binary_select_multiples', False)
+
+ def export(self, path, data, *args):
+ raise NotImplementedError
+
+ def set_survey(self, survey):
+ # TODO resolve circular import
+ from onadata.apps.viewer.models.data_dictionary import\
+ DataDictionary
+
+ def build_sections(
+ current_section, survey_element, sections, select_multiples,
+ gps_fields, encoded_fields, field_delimiter='/'):
+ for child in survey_element.children:
+ current_section_name = current_section['name']
+ # if a section, recurs
+ if isinstance(child, Section):
+ # if its repeating, build a new section
+ if isinstance(child, RepeatingSection):
+ # section_name in recursive call changes
+ section = {
+ 'name': child.get_abbreviated_xpath(),
+ 'elements': []}
+ self.sections.append(section)
+ build_sections(
+ section, child, sections, select_multiples,
+ gps_fields, encoded_fields, field_delimiter)
+ else:
+ # its a group, recurs using the same section
+ build_sections(
+ current_section, child, sections, select_multiples,
+ gps_fields, encoded_fields, field_delimiter)
+ elif isinstance(child, Question) and child.bind.get(u"type")\
+ not in QUESTION_TYPES_TO_EXCLUDE:
+ # add to survey_sections
+ if isinstance(child, Question):
+ child_xpath = child.get_abbreviated_xpath()
+ current_section['elements'].append({
+ 'title': ExportBuilder.format_field_title(
+ child.get_abbreviated_xpath(),
+ field_delimiter),
+ 'xpath': child_xpath,
+ 'type': child.bind.get(u"type")
+ })
+
+ if _is_invalid_for_mongo(child_xpath):
+ if current_section_name not in encoded_fields:
+ encoded_fields[current_section_name] = {}
+ encoded_fields[current_section_name].update(
+ {child_xpath: _encode_for_mongo(child_xpath)})
+
+ # if its a select multiple, make columns out of its choices
+ if child.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE\
+ and self.split_select_multiples:
+ for c in child.children:
+ _xpath = c.get_abbreviated_xpath()
+ _title = ExportBuilder.format_field_title(
+ _xpath, field_delimiter)
+ choice = {
+ 'title': _title,
+ 'xpath': _xpath,
+ 'type': 'string'
+ }
+
+ if choice not in current_section['elements']:
+ current_section['elements'].append(choice)
+ _append_xpaths_to_section(
+ current_section_name, select_multiples,
+ child.get_abbreviated_xpath(),
+ [c.get_abbreviated_xpath()
+ for c in child.children])
+
+ # split gps fields within this section
+ if child.bind.get(u"type") == GEOPOINT_BIND_TYPE:
+ # add columns for geopoint components
+ xpaths = DataDictionary.get_additional_geopoint_xpaths(
+ child.get_abbreviated_xpath())
+ current_section['elements'].extend(
+ [
+ {
+ 'title': ExportBuilder.format_field_title(
+ xpath, field_delimiter),
+ 'xpath': xpath,
+ 'type': 'decimal'
+ }
+ for xpath in xpaths
+ ])
+ _append_xpaths_to_section(
+ current_section_name, gps_fields,
+ child.get_abbreviated_xpath(), xpaths)
+
+ def _append_xpaths_to_section(current_section_name, field_list, xpath,
+ xpaths):
+ if current_section_name not in field_list:
+ field_list[current_section_name] = {}
+ field_list[
+ current_section_name][xpath] = xpaths
+
+ self.survey = survey
+ self.select_multiples = {}
+ self.gps_fields = {}
+ self.encoded_fields = {}
+ main_section = {'name': survey.name, 'elements': []}
+ self.sections = [main_section]
+ build_sections(
+ main_section, self.survey, self.sections,
+ self.select_multiples, self.gps_fields, self.encoded_fields,
+ self.group_delimiter)
+
+ def section_by_name(self, name):
+ matches = filter(lambda s: s['name'] == name, self.sections)
+ assert(len(matches) == 1)
+ return matches[0]
+
+ def get_valid_sheet_name(self, desired_name, existing_names):
+ # a sheet name has to be <= 31 characters and not a duplicate of an
+ # existing sheet
+ # truncate sheet_name to SHEET_NAME_MAX_CHARS
+ new_sheet_name = desired_name[:self.SHEET_NAME_MAX_CHARS]
+
+ # make sure its unique within the list
+ i = 1
+ generated_name = new_sheet_name
+ while generated_name in existing_names:
+ digit_length = len(str(i))
+ allowed_name_len = self.SHEET_NAME_MAX_CHARS - digit_length
+ # make name the required len
+ if len(generated_name) > allowed_name_len:
+ generated_name = generated_name[:allowed_name_len]
+ generated_name = "{0}{1}".format(generated_name, i)
+ i += 1
+ return generated_name
+
+
+ @classmethod
+ def format_field_title(cls, abbreviated_xpath, field_delimiter):
+ if field_delimiter != '/':
+ return field_delimiter.join(abbreviated_xpath.split('/'))
+ return abbreviated_xpath
+
+ @classmethod
+ def do_split_select_multiples(self, row, select_multiples):
+ # for each select_multiple, get the associated data and split it
+ for xpath, choices in select_multiples.iteritems():
+ # get the data matching this xpath
+ data = row.get(xpath)
+ selections = []
+ if data:
+ selections = [
+ u'{0}/{1}'.format(
+ xpath, selection) for selection in data.split()]
+ if not self.binary_select_multiples:
+ row.update(dict(
+ [(choice, choice in selections if selections else None)
+ for choice in choices]))
+ else:
+ YES = 1
+ NO = 0
+ row.update(dict(
+ [(choice, YES if choice in selections else NO)
+ for choice in choices]))
+ return row
+
+ @classmethod
+ def split_gps_components(cls, row, gps_fields):
+ # for each gps_field, get associated data and split it
+ for xpath, gps_components in gps_fields.iteritems():
+ data = row.get(xpath)
+ if data:
+ gps_parts = data.split()
+ if len(gps_parts) > 0:
+ row.update(zip(gps_components, gps_parts))
+ return row
+
+ @classmethod
+ def decode_mongo_encoded_fields(cls, row, encoded_fields):
+ for xpath, encoded_xpath in encoded_fields.iteritems():
+ if row.get(encoded_xpath):
+ val = row.pop(encoded_xpath)
+ row.update({xpath: val})
+ return row
+
+ @classmethod
+ def decode_mongo_encoded_section_names(cls, data):
+ return dict([(_decode_from_mongo(k), v) for k, v in data.iteritems()])
+
+ @classmethod
+ def convert_type(cls, value, data_type):
+ """
+ Convert data to its native type e.g. string '1' to int 1
+ @param value: the string value to convert
+ @param data_type: the native data type to convert to
+ @return: the converted value
+ """
+ func = ExportBuilder.CONVERT_FUNCS.get(data_type, lambda x: x)
+ try:
+ return func(value)
+ except ValueError:
+ return value
+
+ def pre_process_row(self, row, section):
+ """
+ Split select multiples, gps and decode . and $
+ """
+ section_name = section['name']
+
+ # first decode fields so that subsequent lookups
+ # have decoded field names
+ if section_name in self.encoded_fields:
+ row = ExportBuilder.decode_mongo_encoded_fields(
+ row, self.encoded_fields[section_name])
+
+ if self.split_select_multiples and\
+ section_name in self.select_multiples:
+ row = self.do_split_select_multiples(
+ row, self.select_multiples[section_name])
+
+ if section_name in self.gps_fields:
+ row = ExportBuilder.split_gps_components(
+ row, self.gps_fields[section_name])
+
+ # convert to native types
+ for elm in section['elements']:
+ # only convert if its in our list and its not empty, just to
+ # optimize
+ value = row.get(elm['xpath'])
+ if elm['type'] in ExportBuilder.TYPES_TO_CONVERT\
+ and value is not None and value != '':
+ row[elm['xpath']] = ExportBuilder.convert_type(
+ value, elm['type'])
+
+ return row
+
+ @classmethod
+ def dict_to_joined_export(cls, data, index, indices, name):
+ """
+ Converts a dict into one or more tabular datasets
+ """
+ output = {}
+ # TODO: test for _geolocation and attachment lists
+ if isinstance(data, dict):
+ for key, val in data.iteritems():
+ if isinstance(val, list) and key not in [NOTES, TAGS]:
+ output[key] = []
+ for child in val:
+ if key not in indices:
+ indices[key] = 0
+ indices[key] += 1
+ child_index = indices[key]
+ new_output = cls.dict_to_joined_export(
+ child, child_index, indices, key)
+ d = {INDEX: child_index, PARENT_INDEX: index,
+ PARENT_TABLE_NAME: name}
+ # iterate over keys within new_output and append to
+ # main output
+ for out_key, out_val in new_output.iteritems():
+ if isinstance(out_val, list):
+ if out_key not in output:
+ output[out_key] = []
+ output[out_key].extend(out_val)
+ else:
+ d.update(out_val)
+ output[key].append(d)
+ else:
+ if name not in output:
+ output[name] = {}
+ if key in [TAGS]:
+ output[name][key] = ",".join(val)
+ elif key in [NOTES]:
+ output[name][key] = "\r\n".join(
+ [v['note'] for v in val])
+ else:
+ output[name][key] = val
+
+ return output
+
+ @classmethod
+ def encode_if_str(cls, row, key, encode_dates=False):
+ val = row.get(key)
+
+ if isinstance(val, six.string_types):
+ return val.encode('utf-8')
+
+ if encode_dates and isinstance(val, datetime):
+ return val.strftime('%Y-%m-%dT%H:%M:%S%z').encode('utf-8')
+
+ if encode_dates and isinstance(val, date):
+ return val.strftime('%Y-%m-%d').encode('utf-8')
+
+ return val
diff --git a/onadata/libs/utils/export_tools.py b/onadata/libs/utils/export_tools.py
index 44dfa4d62..c0e0dca2f 100644
--- a/onadata/libs/utils/export_tools.py
+++ b/onadata/libs/utils/export_tools.py
@@ -1,12 +1,10 @@
-import csv
from datetime import datetime, date
import json
import os
import re
import six
from urlparse import urlparse
-from zipfile import ZipFile
-
+
from bson import json_util
from django.conf import settings
from django.core.files.base import File
@@ -14,52 +12,28 @@
from django.core.files.storage import get_storage_class
from django.contrib.auth.models import User
from django.shortcuts import render_to_response
-from openpyxl.date_time import SharedDate
-from openpyxl.workbook import Workbook
-from pyxform.question import Question
-from pyxform.section import Section, RepeatingSection
-from savReaderWriter import SavWriter
from json2xlsclient.client import Client
from onadata.apps.logger.models import Attachment, Instance, XForm
from onadata.apps.main.models.meta_data import MetaData
from onadata.apps.viewer.models.export import Export
-from onadata.apps.viewer.models.parsed_instance import\
- _is_invalid_for_mongo, _encode_for_mongo, dict_for_mongo,\
- _decode_from_mongo
-from onadata.libs.utils.viewer_tools import create_attachments_zipfile,\
- image_urls
+from onadata.apps.viewer.models.parsed_instance import dict_for_mongo
+
+from onadata.libs.utils.viewer_tools import create_attachments_zipfile
from onadata.libs.utils.common_tags import (
- ID, XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION, BAMBOO_DATASET_ID,
- DELETEDAT, USERFORM_ID, INDEX, PARENT_INDEX, PARENT_TABLE_NAME,
- SUBMISSION_TIME, UUID, TAGS, NOTES)
+ USERFORM_ID, INDEX, PARENT_INDEX, PARENT_TABLE_NAME, TAGS, NOTES)
from onadata.libs.exceptions import J2XException
-
-
+from onadata.libs.utils.google_sheets import SheetsExportBuilder
+from onadata.libs.utils.csv_export import FlatCsvExportBuilder, ZippedCsvExportBuilder
+from onadata.libs.utils.sav_export import ZippedSavExportBuilder
+from onadata.libs.utils.xls_export import XlsExportBuilder
+
# this is Mongo Collection where we will store the parsed submissions
xform_instances = settings.MONGO_DB.instances
QUESTION_TYPES_TO_EXCLUDE = [
u'note',
]
-# the bind type of select multiples that we use to compare
-MULTIPLE_SELECT_BIND_TYPE = u"select"
-GEOPOINT_BIND_TYPE = u"geopoint"
-
-
-def encode_if_str(row, key, encode_dates=False):
- val = row.get(key)
-
- if isinstance(val, six.string_types):
- return val.encode('utf-8')
-
- if encode_dates and isinstance(val, datetime):
- return val.strftime('%Y-%m-%dT%H:%M:%S%z').encode('utf-8')
-
- if encode_dates and isinstance(val, date):
- return val.strftime('%Y-%m-%d').encode('utf-8')
-
- return val
def question_types_to_exclude(_type):
@@ -126,566 +100,22 @@ def get_observation_from_dict(self, d):
return result
-def dict_to_joined_export(data, index, indices, name):
- """
- Converts a dict into one or more tabular datasets
- """
- output = {}
- # TODO: test for _geolocation and attachment lists
- if isinstance(data, dict):
- for key, val in data.iteritems():
- if isinstance(val, list) and key not in [NOTES, TAGS]:
- output[key] = []
- for child in val:
- if key not in indices:
- indices[key] = 0
- indices[key] += 1
- child_index = indices[key]
- new_output = dict_to_joined_export(
- child, child_index, indices, key)
- d = {INDEX: child_index, PARENT_INDEX: index,
- PARENT_TABLE_NAME: name}
- # iterate over keys within new_output and append to
- # main output
- for out_key, out_val in new_output.iteritems():
- if isinstance(out_val, list):
- if out_key not in output:
- output[out_key] = []
- output[out_key].extend(out_val)
- else:
- d.update(out_val)
- output[key].append(d)
- else:
- if name not in output:
- output[name] = {}
- if key in [TAGS]:
- output[name][key] = ",".join(val)
- elif key in [NOTES]:
- output[name][key] = "\r\n".join(
- [v['note'] for v in val])
- else:
- output[name][key] = val
-
- return output
-
-
-class ExportBuilder(object):
- IGNORED_COLUMNS = [XFORM_ID_STRING, STATUS, ATTACHMENTS, GEOLOCATION,
- BAMBOO_DATASET_ID, DELETEDAT]
- # fields we export but are not within the form's structure
- EXTRA_FIELDS = [ID, UUID, SUBMISSION_TIME, INDEX, PARENT_TABLE_NAME,
- PARENT_INDEX, TAGS, NOTES]
- SPLIT_SELECT_MULTIPLES = True
- BINARY_SELECT_MULTIPLES = False
-
- # column group delimiters
- GROUP_DELIMITER_SLASH = '/'
- GROUP_DELIMITER_DOT = '.'
- GROUP_DELIMITER = GROUP_DELIMITER_SLASH
- GROUP_DELIMITERS = [GROUP_DELIMITER_SLASH, GROUP_DELIMITER_DOT]
- TYPES_TO_CONVERT = ['int', 'decimal', 'date'] # , 'dateTime']
- CONVERT_FUNCS = {
- 'int': lambda x: int(x),
- 'decimal': lambda x: float(x),
- 'date': lambda x: ExportBuilder.string_to_date_with_xls_validation(x),
- 'dateTime': lambda x: datetime.strptime(x[:19], '%Y-%m-%dT%H:%M:%S')
- }
-
- XLS_SHEET_NAME_MAX_CHARS = 31
-
- @classmethod
- def string_to_date_with_xls_validation(cls, date_str):
- date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
- try:
- SharedDate().datetime_to_julian(date_obj)
- except ValueError:
- return date_str
- else:
- return date_obj
-
- @classmethod
- def format_field_title(cls, abbreviated_xpath, field_delimiter):
- if field_delimiter != '/':
- return field_delimiter.join(abbreviated_xpath.split('/'))
- return abbreviated_xpath
-
- def set_survey(self, survey):
- # TODO resolve circular import
- from onadata.apps.viewer.models.data_dictionary import\
- DataDictionary
-
- def build_sections(
- current_section, survey_element, sections, select_multiples,
- gps_fields, encoded_fields, field_delimiter='/'):
- for child in survey_element.children:
- current_section_name = current_section['name']
- # if a section, recurs
- if isinstance(child, Section):
- # if its repeating, build a new section
- if isinstance(child, RepeatingSection):
- # section_name in recursive call changes
- section = {
- 'name': child.get_abbreviated_xpath(),
- 'elements': []}
- self.sections.append(section)
- build_sections(
- section, child, sections, select_multiples,
- gps_fields, encoded_fields, field_delimiter)
- else:
- # its a group, recurs using the same section
- build_sections(
- current_section, child, sections, select_multiples,
- gps_fields, encoded_fields, field_delimiter)
- elif isinstance(child, Question) and child.bind.get(u"type")\
- not in QUESTION_TYPES_TO_EXCLUDE:
- # add to survey_sections
- if isinstance(child, Question):
- child_xpath = child.get_abbreviated_xpath()
- current_section['elements'].append({
- 'title': ExportBuilder.format_field_title(
- child.get_abbreviated_xpath(),
- field_delimiter),
- 'xpath': child_xpath,
- 'type': child.bind.get(u"type")
- })
-
- if _is_invalid_for_mongo(child_xpath):
- if current_section_name not in encoded_fields:
- encoded_fields[current_section_name] = {}
- encoded_fields[current_section_name].update(
- {child_xpath: _encode_for_mongo(child_xpath)})
-
- # if its a select multiple, make columns out of its choices
- if child.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE\
- and self.SPLIT_SELECT_MULTIPLES:
- for c in child.children:
- _xpath = c.get_abbreviated_xpath()
- _title = ExportBuilder.format_field_title(
- _xpath, field_delimiter)
- choice = {
- 'title': _title,
- 'xpath': _xpath,
- 'type': 'string'
- }
-
- if choice not in current_section['elements']:
- current_section['elements'].append(choice)
- _append_xpaths_to_section(
- current_section_name, select_multiples,
- child.get_abbreviated_xpath(),
- [c.get_abbreviated_xpath()
- for c in child.children])
-
- # split gps fields within this section
- if child.bind.get(u"type") == GEOPOINT_BIND_TYPE:
- # add columns for geopoint components
- xpaths = DataDictionary.get_additional_geopoint_xpaths(
- child.get_abbreviated_xpath())
- current_section['elements'].extend(
- [
- {
- 'title': ExportBuilder.format_field_title(
- xpath, field_delimiter),
- 'xpath': xpath,
- 'type': 'decimal'
- }
- for xpath in xpaths
- ])
- _append_xpaths_to_section(
- current_section_name, gps_fields,
- child.get_abbreviated_xpath(), xpaths)
-
- def _append_xpaths_to_section(current_section_name, field_list, xpath,
- xpaths):
- if current_section_name not in field_list:
- field_list[current_section_name] = {}
- field_list[
- current_section_name][xpath] = xpaths
-
- self.survey = survey
- self.select_multiples = {}
- self.gps_fields = {}
- self.encoded_fields = {}
- main_section = {'name': survey.name, 'elements': []}
- self.sections = [main_section]
- build_sections(
- main_section, self.survey, self.sections,
- self.select_multiples, self.gps_fields, self.encoded_fields,
- self.GROUP_DELIMITER)
-
- def section_by_name(self, name):
- matches = filter(lambda s: s['name'] == name, self.sections)
- assert(len(matches) == 1)
- return matches[0]
-
- @classmethod
- def split_select_multiples(cls, row, select_multiples):
- # for each select_multiple, get the associated data and split it
- for xpath, choices in select_multiples.iteritems():
- # get the data matching this xpath
- data = row.get(xpath)
- selections = []
- if data:
- selections = [
- u'{0}/{1}'.format(
- xpath, selection) for selection in data.split()]
- if not cls.BINARY_SELECT_MULTIPLES:
- row.update(dict(
- [(choice, choice in selections if selections else None)
- for choice in choices]))
- else:
- YES = 1
- NO = 0
- row.update(dict(
- [(choice, YES if choice in selections else NO)
- for choice in choices]))
- return row
-
- @classmethod
- def split_gps_components(cls, row, gps_fields):
- # for each gps_field, get associated data and split it
- for xpath, gps_components in gps_fields.iteritems():
- data = row.get(xpath)
- if data:
- gps_parts = data.split()
- if len(gps_parts) > 0:
- row.update(zip(gps_components, gps_parts))
- return row
-
- @classmethod
- def decode_mongo_encoded_fields(cls, row, encoded_fields):
- for xpath, encoded_xpath in encoded_fields.iteritems():
- if row.get(encoded_xpath):
- val = row.pop(encoded_xpath)
- row.update({xpath: val})
- return row
-
- @classmethod
- def decode_mongo_encoded_section_names(cls, data):
- return dict([(_decode_from_mongo(k), v) for k, v in data.iteritems()])
-
- @classmethod
- def convert_type(cls, value, data_type):
- """
- Convert data to its native type e.g. string '1' to int 1
- @param value: the string value to convert
- @param data_type: the native data type to convert to
- @return: the converted value
- """
- func = ExportBuilder.CONVERT_FUNCS.get(data_type, lambda x: x)
- try:
- return func(value)
- except ValueError:
- return value
-
- def pre_process_row(self, row, section):
- """
- Split select multiples, gps and decode . and $
- """
- section_name = section['name']
-
- # first decode fields so that subsequent lookups
- # have decoded field names
- if section_name in self.encoded_fields:
- row = ExportBuilder.decode_mongo_encoded_fields(
- row, self.encoded_fields[section_name])
-
- if self.SPLIT_SELECT_MULTIPLES and\
- section_name in self.select_multiples:
- row = ExportBuilder.split_select_multiples(
- row, self.select_multiples[section_name])
-
- if section_name in self.gps_fields:
- row = ExportBuilder.split_gps_components(
- row, self.gps_fields[section_name])
-
- # convert to native types
- for elm in section['elements']:
- # only convert if its in our list and its not empty, just to
- # optimize
- value = row.get(elm['xpath'])
- if elm['type'] in ExportBuilder.TYPES_TO_CONVERT\
- and value is not None and value != '':
- row[elm['xpath']] = ExportBuilder.convert_type(
- value, elm['type'])
-
- return row
-
- def to_zipped_csv(self, path, data, *args):
- def write_row(row, csv_writer, fields):
- csv_writer.writerow(
- [encode_if_str(row, field) for field in fields])
-
- csv_defs = {}
- for section in self.sections:
- csv_file = NamedTemporaryFile(suffix=".csv")
- csv_writer = csv.writer(csv_file)
- csv_defs[section['name']] = {
- 'csv_file': csv_file, 'csv_writer': csv_writer}
-
- # write headers
- for section in self.sections:
- fields = [element['title'] for element in section['elements']]\
- + self.EXTRA_FIELDS
- csv_defs[section['name']]['csv_writer'].writerow(
- [f.encode('utf-8') for f in fields])
-
- index = 1
- indices = {}
- survey_name = self.survey.name
- for d in data:
- # decode mongo section names
- joined_export = dict_to_joined_export(d, index, indices,
- survey_name)
- output = ExportBuilder.decode_mongo_encoded_section_names(
- joined_export)
- # attach meta fields (index, parent_index, parent_table)
- # output has keys for every section
- if survey_name not in output:
- output[survey_name] = {}
- output[survey_name][INDEX] = index
- output[survey_name][PARENT_INDEX] = -1
- for section in self.sections:
- # get data for this section and write to csv
- section_name = section['name']
- csv_def = csv_defs[section_name]
- fields = [
- element['xpath'] for element in
- section['elements']] + self.EXTRA_FIELDS
- csv_writer = csv_def['csv_writer']
- # section name might not exist within the output, e.g. data was
- # not provided for said repeat - write test to check this
- row = output.get(section_name, None)
- if type(row) == dict:
- write_row(
- self.pre_process_row(row, section),
- csv_writer, fields)
- elif type(row) == list:
- for child_row in row:
- write_row(
- self.pre_process_row(child_row, section),
- csv_writer, fields)
- index += 1
-
- # write zipfile
- with ZipFile(path, 'w') as zip_file:
- for section_name, csv_def in csv_defs.iteritems():
- csv_file = csv_def['csv_file']
- csv_file.seek(0)
- zip_file.write(
- csv_file.name, "_".join(section_name.split("/")) + ".csv")
-
- # close files when we are done
- for section_name, csv_def in csv_defs.iteritems():
- csv_def['csv_file'].close()
-
- @classmethod
- def get_valid_sheet_name(cls, desired_name, existing_names):
- # a sheet name has to be <= 31 characters and not a duplicate of an
- # existing sheet
- # truncate sheet_name to XLSDataFrameBuilder.SHEET_NAME_MAX_CHARS
- new_sheet_name = \
- desired_name[:cls.XLS_SHEET_NAME_MAX_CHARS]
-
- # make sure its unique within the list
- i = 1
- generated_name = new_sheet_name
- while generated_name in existing_names:
- digit_length = len(str(i))
- allowed_name_len = cls.XLS_SHEET_NAME_MAX_CHARS - \
- digit_length
- # make name the required len
- if len(generated_name) > allowed_name_len:
- generated_name = generated_name[:allowed_name_len]
- generated_name = "{0}{1}".format(generated_name, i)
- i += 1
- return generated_name
-
- def to_xls_export(self, path, data, *args):
- def write_row(data, work_sheet, fields, work_sheet_titles):
- # update parent_table with the generated sheet's title
- data[PARENT_TABLE_NAME] = work_sheet_titles.get(
- data.get(PARENT_TABLE_NAME))
- work_sheet.append([data.get(f) for f in fields])
-
- wb = Workbook(optimized_write=True)
- work_sheets = {}
- # map of section_names to generated_names
- work_sheet_titles = {}
- for section in self.sections:
- section_name = section['name']
- work_sheet_title = ExportBuilder.get_valid_sheet_name(
- "_".join(section_name.split("/")), work_sheet_titles.values())
- work_sheet_titles[section_name] = work_sheet_title
- work_sheets[section_name] = wb.create_sheet(
- title=work_sheet_title)
-
- # write the headers
- for section in self.sections:
- section_name = section['name']
- headers = [
- element['title'] for element in
- section['elements']] + self.EXTRA_FIELDS
- # get the worksheet
- ws = work_sheets[section_name]
- ws.append(headers)
-
- index = 1
- indices = {}
- survey_name = self.survey.name
- for d in data:
- joined_export = dict_to_joined_export(d, index, indices,
- survey_name)
- output = ExportBuilder.decode_mongo_encoded_section_names(
- joined_export)
- # attach meta fields (index, parent_index, parent_table)
- # output has keys for every section
- if survey_name not in output:
- output[survey_name] = {}
- output[survey_name][INDEX] = index
- output[survey_name][PARENT_INDEX] = -1
- for section in self.sections:
- # get data for this section and write to xls
- section_name = section['name']
- fields = [
- element['xpath'] for element in
- section['elements']] + self.EXTRA_FIELDS
-
- ws = work_sheets[section_name]
- # section might not exist within the output, e.g. data was
- # not provided for said repeat - write test to check this
- row = output.get(section_name, None)
- if type(row) == dict:
- write_row(
- self.pre_process_row(row, section),
- ws, fields, work_sheet_titles)
- elif type(row) == list:
- for child_row in row:
- write_row(
- self.pre_process_row(child_row, section),
- ws, fields, work_sheet_titles)
- index += 1
-
- wb.save(filename=path)
-
- def to_flat_csv_export(
- self, path, data, username, id_string, filter_query):
- # TODO resolve circular import
- from onadata.apps.viewer.pandas_mongo_bridge import\
- CSVDataFrameBuilder
-
- csv_builder = CSVDataFrameBuilder(
- username, id_string, filter_query, self.GROUP_DELIMITER,
- self.SPLIT_SELECT_MULTIPLES, self.BINARY_SELECT_MULTIPLES)
- csv_builder.export_to(path)
-
- def to_zipped_sav(self, path, data, *args):
- def write_row(row, csv_writer, fields):
- sav_writer.writerow(
- [encode_if_str(row, field, True) for field in fields])
-
- sav_defs = {}
-
- # write headers
- for section in self.sections:
- fields = [element['title'] for element in section['elements']]\
- + self.EXTRA_FIELDS
- c = 0
- var_labels = {}
- var_names = []
- tmp_k = {}
- for field in fields:
- c += 1
- var_name = 'var%d' % c
- var_labels[var_name] = field
- var_names.append(var_name)
- tmp_k[field] = var_name
-
- var_types = dict(
- [(tmp_k[element['title']],
- 0 if element['type'] in ['decimal', 'int'] else 255)
- for element in section['elements']]
- + [(tmp_k[item],
- 0 if item in ['_id', '_index', '_parent_index'] else 255)
- for item in self.EXTRA_FIELDS]
- )
- sav_file = NamedTemporaryFile(suffix=".sav")
- sav_writer = SavWriter(sav_file.name, varNames=var_names,
- varTypes=var_types,
- varLabels=var_labels, ioUtf8=True)
- sav_defs[section['name']] = {
- 'sav_file': sav_file, 'sav_writer': sav_writer}
-
- index = 1
- indices = {}
- survey_name = self.survey.name
- for d in data:
- # decode mongo section names
- joined_export = dict_to_joined_export(d, index, indices,
- survey_name)
- output = ExportBuilder.decode_mongo_encoded_section_names(
- joined_export)
- # attach meta fields (index, parent_index, parent_table)
- # output has keys for every section
- if survey_name not in output:
- output[survey_name] = {}
- output[survey_name][INDEX] = index
- output[survey_name][PARENT_INDEX] = -1
- for section in self.sections:
- # get data for this section and write to csv
- section_name = section['name']
- sav_def = sav_defs[section_name]
- fields = [
- element['xpath'] for element in
- section['elements']] + self.EXTRA_FIELDS
- sav_writer = sav_def['sav_writer']
- row = output.get(section_name, None)
- if type(row) == dict:
- write_row(
- self.pre_process_row(row, section),
- sav_writer, fields)
- elif type(row) == list:
- for child_row in row:
- write_row(
- self.pre_process_row(child_row, section),
- sav_writer, fields)
- index += 1
-
- for section_name, sav_def in sav_defs.iteritems():
- sav_def['sav_writer'].closeSavFile(
- sav_def['sav_writer'].fh, mode='wb')
-
- # write zipfile
- with ZipFile(path, 'w') as zip_file:
- for section_name, sav_def in sav_defs.iteritems():
- sav_file = sav_def['sav_file']
- sav_file.seek(0)
- zip_file.write(
- sav_file.name, "_".join(section_name.split("/")) + ".sav")
-
- # close files when we are done
- for section_name, sav_def in sav_defs.iteritems():
- sav_def['sav_file'].close()
-
-
-def dict_to_flat_export(d, parent_index=0):
- pass
-
-
def generate_export(export_type, extension, username, id_string,
export_id=None, filter_query=None, group_delimiter='/',
- split_select_multiples=True,
- binary_select_multiples=False):
+ split_select_multiples=True, binary_select_multiples=False,
+ google_token=None, flatten_repeated_fields=True,
+ export_xlsform=True):
"""
Create appropriate export object given the export type
"""
# TODO resolve circular import
from onadata.apps.viewer.models.export import Export
- export_type_func_map = {
- Export.XLS_EXPORT: 'to_xls_export',
- Export.CSV_EXPORT: 'to_flat_csv_export',
- Export.CSV_ZIP_EXPORT: 'to_zipped_csv',
- Export.SAV_ZIP_EXPORT: 'to_zipped_sav',
+ export_type_class_map = {
+ Export.XLS_EXPORT: XlsExportBuilder,
+ Export.GSHEETS_EXPORT: SheetsExportBuilder,
+ Export.CSV_EXPORT: FlatCsvExportBuilder,
+ Export.CSV_ZIP_EXPORT: ZippedCsvExportBuilder,
+ Export.SAV_ZIP_EXPORT: ZippedSavExportBuilder,
}
xform = XForm.objects.get(
@@ -694,24 +124,34 @@ def generate_export(export_type, extension, username, id_string,
# query mongo for the cursor
records = query_mongo(username, id_string, filter_query)
- export_builder = ExportBuilder()
- export_builder.GROUP_DELIMITER = group_delimiter
- export_builder.SPLIT_SELECT_MULTIPLES = split_select_multiples
- export_builder.BINARY_SELECT_MULTIPLES = binary_select_multiples
+ spreadsheet_title = "%s_%s" % (id_string,
+ datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
+ config = {}
+ config['group_delimiter'] = group_delimiter
+ config['split_select_multiples'] = split_select_multiples
+ config['binary_select_multiples'] = binary_select_multiples
+ if export_type == Export.GSHEETS_EXPORT:
+ config['spreadsheet_title'] = spreadsheet_title
+ config['google_token'] = google_token
+ config['flatten_repeated_fields'] = flatten_repeated_fields
+ config['export_xlsform'] = export_xlsform
+ print 'config: %s' % config
+ export_builder = export_type_class_map[export_type](xform, config)
export_builder.set_survey(xform.data_dictionary().survey)
-
- temp_file = NamedTemporaryFile(suffix=("." + extension))
-
- # get the export function by export type
- func = getattr(export_builder, export_type_func_map[export_type])
-
- func.__call__(
+
+ if extension:
+ temp_file = NamedTemporaryFile(suffix=("." + extension))
+ else:
+ temp_file = NamedTemporaryFile(delete=False)
+
+ # run the export
+ export_builder.export(
temp_file.name, records, username, id_string, filter_query)
# generate filename
- basename = "%s_%s" % (
- id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
- filename = basename + "." + extension
+ filename = spreadsheet_title
+ if extension:
+ filename = filename + "." + extension
# check filename is unique
while not Export.is_filename_unique(xform, filename):
@@ -743,6 +183,10 @@ def generate_export(export_type, extension, username, id_string,
export.filedir = dir_name
export.filename = basename
export.internal_status = Export.SUCCESSFUL
+ # Get URL of the exported sheet.
+ if export_type == Export.GSHEETS_EXPORT:
+ export.export_url = export_builder.url
+
# dont persist exports that have a filter
if filter_query is None:
export.save()
diff --git a/onadata/libs/utils/google.py b/onadata/libs/utils/google.py
index 52864e971..3eb6075ba 100644
--- a/onadata/libs/utils/google.py
+++ b/onadata/libs/utils/google.py
@@ -3,12 +3,9 @@
import urllib
import urllib2
-import gdata
+# TODO: gdata is deprecated. For OAuth2 authentication it should be replaced
+# by oauth2client.
import gdata.gauth
-import gdata.docs
-import gdata.data
-import gdata.docs.client
-import gdata.docs.data
from django.conf import settings
@@ -17,7 +14,8 @@
client_secret=settings.GOOGLE_CLIENT_SECRET,
scope=' '.join(
['https://docs.google.com/feeds/',
- 'https://spreadsheets.google.com/feeds/']),
+ 'https://spreadsheets.google.com/feeds/',
+ 'https://www.googleapis.com/auth/drive.file']),
user_agent='formhub')
redirect_uri = oauth2_token.generate_authorize_url(
@@ -41,20 +39,4 @@ def get_refreshed_token(token):
token.access_token = tokens['access_token']
return token
-
-def google_export_xls(filename, title, token, blob=True):
- if blob:
- token = gdata.gauth.token_from_blob(token)
- if token.refresh_token is not None \
- and token.access_token is not None:
- oauth2_token.refresh_token = token.refresh_token
- working_token = get_refreshed_token(oauth2_token)
- docs_client = gdata.docs.client.DocsClient(
- source=oauth2_token.user_agent)
- docs_client = working_token.authorize(docs_client)
- xls_doc = gdata.docs.data.Resource(
- type='spreadsheet', title=title)
- media = gdata.data.MediaSource()
- media.SetFileHandle(filename, 'application/vnd.ms-excel')
- xls_doc = docs_client.CreateResource(xls_doc, media=media)
- return xls_doc.find_html_link()
+
diff --git a/onadata/libs/utils/google_sheets.py b/onadata/libs/utils/google_sheets.py
new file mode 100644
index 000000000..ee8c2806e
--- /dev/null
+++ b/onadata/libs/utils/google_sheets.py
@@ -0,0 +1,308 @@
+"""
+This module contains classes responsible for communicating with
+Google Data API and common spreadsheets models.
+"""
+import csv
+import gdata.gauth
+import gspread
+import io
+import json
+import xlrd
+
+from django.conf import settings
+from django.core.files.storage import get_storage_class
+from oauth2client.client import SignedJwtAssertionCredentials
+from onadata.koboform.pyxform_utils import convert_csv_to_xls
+from onadata.libs.utils.google import get_refreshed_token
+from onadata.libs.utils.export_builder import ExportBuilder
+from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX, PARENT_TABLE_NAME
+
+
+def update_row(worksheet, index, values):
+ """"Adds a row to the worksheet at the specified index and populates it with values.
+ Widens the worksheet if there are more values than columns.
+ :param worksheet: The worksheet to be updated.
+ :param index: Index of the row to be updated.
+ :param values: List of values for the row.
+ """
+ data_width = len(values)
+ if worksheet.col_count < data_width:
+ worksheet.resize(cols=data_width)
+
+ cell_list = []
+ for i, value in enumerate(values, start=1):
+ cell = worksheet.cell(index, i)
+ cell.value = value
+ cell_list.append(cell)
+
+ worksheet.update_cells(cell_list)
+
+
+def xldr_format_value(cell):
+ """A helper function to format the value of a cell.
+ The xldr stores integers as floats which means that the cell value
+ 42 in Excel is returned as 42.0 in Python. This function tries to guess
+ if the original value was an integer and returns the proper type.
+ """
+ value = cell.value
+ if cell.ctype == xlrd.XL_CELL_NUMBER and int(value) == value:
+ value = int(value)
+ return value
+
+
+class SheetsClient(gspread.client.Client):
+ """An instance of this class communicates with Google Data API."""
+
+ AUTH_SCOPE = ' '.join(['https://docs.google.com/feeds/',
+ 'https://spreadsheets.google.com/feeds/',
+ 'https://www.googleapis.com/auth/drive.file'])
+
+ DRIVE_API_URL = 'https://www.googleapis.com/drive/v2/files'
+
+ def new(self, title):
+ headers = {'Content-Type': 'application/json'}
+ data = {
+ 'title': title,
+ 'mimeType': 'application/vnd.google-apps.spreadsheet'
+ }
+ r = self.session.request(
+ 'POST', SheetsClient.DRIVE_API_URL, headers=headers, data=json.dumps(data))
+ resp = json.loads(r.read().decode('utf-8'))
+ sheet_id = resp['id']
+ return self.open_by_key(sheet_id)
+
+
+ def add_service_account_to_spreadsheet(self, spreadsheet):
+ url = '%s/%s/permissions' % (SheetsClient.DRIVE_API_URL, spreadsheet.id)
+ headers = {'Content-Type': 'application/json'}
+ data = {
+ 'role': 'writer',
+ 'type': 'user',
+ 'value': settings.GOOGLE_CLIENT_EMAIL
+ }
+
+ self.session.request(
+ 'POST', url, headers=headers, data=json.dumps(data))
+
+ @classmethod
+ def login_with_service_account(cls):
+ credential = SignedJwtAssertionCredentials(settings.GOOGLE_CLIENT_EMAIL,
+ settings.GOOGLE_CLIENT_PRIVATE_KEY, scope=SheetsClient.AUTH_SCOPE)
+
+ client = SheetsClient(auth=credential)
+ client.login()
+ return client
+
+ @classmethod
+ def login_with_auth_token(cls, token_string):
+ # deserialize the token.
+ token = gdata.gauth.token_from_blob(token_string)
+ assert token.refresh_token
+
+ # Refresh OAuth token if necessary.
+ oauth2_token = gdata.gauth.OAuth2Token(
+ client_id=settings.GOOGLE_CLIENT_ID,
+ client_secret=settings.GOOGLE_CLIENT_SECRET,
+ scope=SheetsClient.AUTH_SCOPE,
+ user_agent='formhub')
+ oauth2_token.refresh_token = token.refresh_token
+ refreshed_token = get_refreshed_token(oauth2_token)
+
+ # Create Google Sheet.
+ client = SheetsClient(auth=refreshed_token)
+ client.login()
+ return client
+
+
+class SheetsExportBuilder(ExportBuilder):
+ client = None
+ spreadsheet = None
+ # Worksheets generated by this class.
+ worksheets = {}
+ # Map of section_names to generated_names
+ worksheet_titles = {}
+ # The URL of the exported sheet.
+ url = None
+
+ # Configuration options
+ spreadsheet_title = None
+ flatten_repeated_fields = True
+ export_xlsform = True
+ google_token = None
+
+ # Constants
+ SHEETS_BASE_URL = 'https://docs.google.com/spreadsheet/ccc?key=%s&hl'
+ FLATTENED_SHEET_TITLE = 'raw'
+
+ def __init__(self, xform, config):
+ super(SheetsExportBuilder, self).__init__(xform, config)
+ self.spreadsheet_title = config['spreadsheet_title']
+ self.google_token = config['google_token']
+ self.flatten_repeated_fields = config['flatten_repeated_fields']
+ self.export_xlsform = config['export_xlsform']
+
+ def export(self, path, data, username, id_string, filter_query):
+ self.client = SheetsClient.login_with_auth_token(self.google_token)
+
+ # Create a new sheet
+ self.spreadsheet = self.client.new(title=self.spreadsheet_title)
+ self.url = self.SHEETS_BASE_URL % self.spreadsheet.id
+
+ # Add Service account as editor
+ self.client.add_service_account_to_spreadsheet(self.spreadsheet)
+
+ # Perform the actual export
+ if self.flatten_repeated_fields:
+ self.export_flattened(path, data, username, id_string, filter_query)
+ else:
+ self.export_tabular(path, data)
+
+ # Write XLSForm data
+ if self.export_xlsform:
+ self._insert_xlsform()
+
+ # Delete the default worksheet if it exists
+ # NOTE: for some reason self.spreadsheet.worksheets() does not contain
+ # the default worksheet (Sheet1). We therefore need to fetch an
+ # updated list here.
+ feed = self.client.get_worksheets_feed(self.spreadsheet)
+ for elem in feed.findall(gspread.ns._ns('entry')):
+ ws = gspread.Worksheet(self.spreadsheet, elem)
+ if ws.title == 'Sheet1':
+ self.client.del_worksheet(ws)
+
+ def export_flattened(self, path, data, username, id_string, filter_query):
+ # Build a flattened CSV
+ from onadata.apps.viewer.pandas_mongo_bridge import CSVDataFrameBuilder
+ csv_builder = CSVDataFrameBuilder(
+ username, id_string, filter_query, self.group_delimiter,
+ self.split_select_multiples, self.binary_select_multiples)
+ csv_builder.export_to(path)
+
+ # Read CSV back in and filter n/a entries
+ rows = []
+ with open(path) as f:
+ reader = csv.reader(f)
+ for row in reader:
+ filtered_rows = [x if x != 'n/a' else '' for x in row]
+ rows.append(filtered_rows)
+
+ # Create a worksheet for flattened data
+ num_rows = len(rows)
+ if not num_rows:
+ return
+ num_cols = len(rows[0])
+ ws = self.spreadsheet.add_worksheet(
+ title=self.FLATTENED_SHEET_TITLE, rows=num_rows, cols=num_cols)
+
+ # Write data row by row
+ for index, values in enumerate(rows, 1):
+ update_row(ws, index, values)
+
+ def export_tabular(self, path, data):
+ # Add worksheets for export.
+ self._create_worksheets()
+
+ # Write the headers
+ self._insert_headers()
+
+ # Write the data
+ self._insert_data(data)
+
+ def _insert_xlsform(self):
+ """Exports XLSForm (e.g. survey, choices) to the sheet."""
+ assert self.client
+ assert self.spreadsheet
+ assert self.xform
+
+ file_path = self.xform.xls.name
+ default_storage = get_storage_class()()
+
+ if file_path == '' or not default_storage.exists(file_path):
+ # No XLS file for your form
+ return
+
+ with default_storage.open(file_path) as xlsform_file:
+ if file_path.endswith('.csv'):
+ xlsform_io = convert_csv_to_xls(xlsform_file.read())
+ else:
+ xlsform_io = io.BytesIO(xlsform_file.read())
+ # Open XForm and copy sheets to Google Sheets.
+ workbook = xlrd.open_workbook(file_contents=xlsform_io.read(),
+ formatting_info=True)
+ for wksht_nm in workbook.sheet_names():
+ source_ws = workbook.sheet_by_name(wksht_nm)
+ num_cols = source_ws.ncols
+ num_rows = source_ws.nrows
+ destination_ws = self.spreadsheet.add_worksheet(
+ title=wksht_nm, rows=num_rows, cols=num_cols)
+ for row in xrange(num_rows):
+ update_row(destination_ws, row + 1,
+ [xldr_format_value(source_ws.cell(row, col))
+ for col in xrange(num_cols)] )
+
+ def _insert_data(self, data):
+ """Writes data rows for each section."""
+ indices = {}
+ survey_name = self.survey.name
+ for index, d in enumerate(data, 1):
+ joined_export = ExportBuilder.dict_to_joined_export(
+ d, index, indices, survey_name)
+ output = ExportBuilder.decode_mongo_encoded_section_names(
+ joined_export)
+ # attach meta fields (index, parent_index, parent_table)
+ # output has keys for every section
+ if survey_name not in output:
+ output[survey_name] = {}
+ output[survey_name][INDEX] = index
+ output[survey_name][PARENT_INDEX] = -1
+ for section in self.sections:
+ # get data for this section and write to xls
+ section_name = section['name']
+ fields = [
+ element['xpath'] for element in
+ section['elements']] + self.EXTRA_FIELDS
+
+ ws = self.worksheets[section_name]
+ # section might not exist within the output, e.g. data was
+ # not provided for said repeat - write test to check this
+ row = output.get(section_name, None)
+ if type(row) == dict:
+ SheetsExportBuilder.write_row(
+ self.pre_process_row(row, section),
+ ws, fields, self.worksheet_titles)
+ elif type(row) == list:
+ for child_row in row:
+ SheetsExportBuilder.write_row(
+ self.pre_process_row(child_row, section),
+ ws, fields, self.worksheet_titles)
+
+ def _insert_headers(self):
+ """Writes headers for each section."""
+ for section in self.sections:
+ section_name = section['name']
+ headers = [
+ element['title'] for element in
+ section['elements']] + self.EXTRA_FIELDS
+ # get the worksheet
+ ws = self.worksheets[section_name]
+ update_row(ws, index=1, values=headers)
+
+ def _create_worksheets(self):
+ """Creates one worksheet per section."""
+ for section in self.sections:
+ section_name = section['name']
+ work_sheet_title = self.get_valid_sheet_name(
+ "_".join(section_name.split("/")),
+ self.worksheet_titles.values())
+ self.worksheet_titles[section_name] = work_sheet_title
+ num_cols = len(section['elements']) + len(self.EXTRA_FIELDS)
+ self.worksheets[section_name] = self.spreadsheet.add_worksheet(
+ title=work_sheet_title, rows=1, cols=num_cols)
+
+ @classmethod
+ def write_row(cls, data, worksheet, fields, worksheet_titles):
+ # update parent_table with the generated sheet's title
+ data[PARENT_TABLE_NAME] = worksheet_titles.get(
+ data.get(PARENT_TABLE_NAME))
+ worksheet.append_row([data.get(f) for f in fields])
diff --git a/onadata/libs/utils/sav_export.py b/onadata/libs/utils/sav_export.py
new file mode 100644
index 000000000..d7f7f81d1
--- /dev/null
+++ b/onadata/libs/utils/sav_export.py
@@ -0,0 +1,104 @@
+from django.core.files.temp import NamedTemporaryFile
+from savReaderWriter import SavWriter
+from zipfile import ZipFile
+
+from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX
+from onadata.libs.utils.export_builder import ExportBuilder
+
+
+class ZippedSavExportBuilder(ExportBuilder):
+
+ def __init__(self, xform, config):
+ super(ZippedSavExportBuilder, self).__init__(xform, config)
+
+ @classmethod
+ def write_row(cls, row, sav_writer, fields):
+ sav_writer.writerow(
+ [ExportBuilder.encode_if_str(row, field, True) for field in fields])
+
+
+ def export(self, path, data, *args):
+
+ sav_defs = {}
+
+ # write headers
+ for section in self.sections:
+ fields = [element['title'] for element in section['elements']]\
+ + self.EXTRA_FIELDS
+ c = 0
+ var_labels = {}
+ var_names = []
+ tmp_k = {}
+ for field in fields:
+ c += 1
+ var_name = 'var%d' % c
+ var_labels[var_name] = field
+ var_names.append(var_name)
+ tmp_k[field] = var_name
+
+ var_types = dict(
+ [(tmp_k[element['title']],
+ 0 if element['type'] in ['decimal', 'int'] else 255)
+ for element in section['elements']]
+ + [(tmp_k[item],
+ 0 if item in ['_id', '_index', '_parent_index'] else 255)
+ for item in self.EXTRA_FIELDS]
+ )
+ sav_file = NamedTemporaryFile(suffix=".sav")
+ sav_writer = SavWriter(sav_file.name, varNames=var_names,
+ varTypes=var_types,
+ varLabels=var_labels, ioUtf8=True)
+ sav_defs[section['name']] = {
+ 'sav_file': sav_file, 'sav_writer': sav_writer}
+
+ index = 1
+ indices = {}
+ survey_name = self.survey.name
+ for d in data:
+ # decode mongo section names
+ joined_export = ExportBuilder.dict_to_joined_export(
+ d, index, indices, survey_name)
+ output = ExportBuilder.decode_mongo_encoded_section_names(
+ joined_export)
+ # attach meta fields (index, parent_index, parent_table)
+ # output has keys for every section
+ if survey_name not in output:
+ output[survey_name] = {}
+ output[survey_name][INDEX] = index
+ output[survey_name][PARENT_INDEX] = -1
+ for section in self.sections:
+ # get data for this section and write to csv
+ section_name = section['name']
+ sav_def = sav_defs[section_name]
+ fields = [
+ element['xpath'] for element in
+ section['elements']] + self.EXTRA_FIELDS
+ sav_writer = sav_def['sav_writer']
+ row = output.get(section_name, None)
+ if type(row) == dict:
+ ZippedSavExportBuilder.write_row(
+ self.pre_process_row(row, section),
+ sav_writer, fields)
+ elif type(row) == list:
+ for child_row in row:
+ ZippedSavExportBuilder.write_row(
+ self.pre_process_row(child_row, section),
+ sav_writer, fields)
+ index += 1
+
+ for section_name, sav_def in sav_defs.iteritems():
+ sav_def['sav_writer'].closeSavFile(
+ sav_def['sav_writer'].fh, mode='wb')
+
+ # write zipfile
+ with ZipFile(path, 'w') as zip_file:
+ for section_name, sav_def in sav_defs.iteritems():
+ sav_file = sav_def['sav_file']
+ sav_file.seek(0)
+ zip_file.write(
+ sav_file.name, "_".join(section_name.split("/")) + ".sav")
+
+ # close files when we are done
+ for section_name, sav_def in sav_defs.iteritems():
+ sav_def['sav_file'].close()
+
diff --git a/onadata/libs/utils/xls_export.py b/onadata/libs/utils/xls_export.py
new file mode 100644
index 000000000..b78f74688
--- /dev/null
+++ b/onadata/libs/utils/xls_export.py
@@ -0,0 +1,104 @@
+from datetime import datetime
+
+from openpyxl.date_time import SharedDate
+from openpyxl.workbook import Workbook
+
+from onadata.libs.utils.common_tags import INDEX, PARENT_INDEX, PARENT_TABLE_NAME
+from onadata.libs.utils.export_builder import ExportBuilder
+
+
+class XlsExportBuilder(ExportBuilder):
+
+ # Configuration options
+ group_delimiter = '/'
+ split_select_multiples = True
+ binary_select_multiples = False
+
+ CONVERT_FUNCS = {
+ 'int': lambda x: int(x),
+ 'decimal': lambda x: float(x),
+ 'date': lambda x: XlsExportBuilder.string_to_date_with_xls_validation(x),
+ 'dateTime': lambda x: datetime.strptime(x[:19], '%Y-%m-%dT%H:%M:%S')
+ }
+
+ def __init__(self, xform, config):
+ super(XlsExportBuilder, self).__init__(xform, config)
+ self.group_delimiter = config['group_delimiter']
+ self.split_select_multiples = config['split_select_multiples']
+ self.binary_select_multiples = config['binary_select_multiples']
+
+ @classmethod
+ def string_to_date_with_xls_validation(cls, date_str):
+ date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
+ try:
+ SharedDate().datetime_to_julian(date_obj)
+ except ValueError:
+ return date_str
+ else:
+ return date_obj
+
+ @classmethod
+ def write_row(cls, data, work_sheet, fields, work_sheet_titles):
+ # update parent_table with the generated sheet's title
+ data[PARENT_TABLE_NAME] = work_sheet_titles.get(
+ data.get(PARENT_TABLE_NAME))
+ work_sheet.append([data.get(f) for f in fields])
+
+ def export(self, path, data, *args):
+ wb = Workbook(optimized_write=True)
+ work_sheets = {}
+ # map of section_names to generated_names
+ work_sheet_titles = {}
+ for section in self.sections:
+ section_name = section['name']
+ work_sheet_title = self.get_valid_sheet_name(
+ "_".join(section_name.split("/")), work_sheet_titles.values())
+ work_sheet_titles[section_name] = work_sheet_title
+ work_sheets[section_name] = wb.create_sheet(
+ title=work_sheet_title)
+
+ # write the headers
+ for section in self.sections:
+ section_name = section['name']
+ headers = [
+ element['title'] for element in
+ section['elements']] + self.EXTRA_FIELDS
+ # get the worksheet
+ ws = work_sheets[section_name]
+ ws.append(headers)
+
+ indices = {}
+ survey_name = self.survey.name
+ for index, d in enumerate(data, 1):
+ joined_export = ExportBuilder.dict_to_joined_export(
+ d, index, indices, survey_name)
+ output = ExportBuilder.decode_mongo_encoded_section_names(
+ joined_export)
+ # attach meta fields (index, parent_index, parent_table)
+ # output has keys for every section
+ if survey_name not in output:
+ output[survey_name] = {}
+ output[survey_name][INDEX] = index
+ output[survey_name][PARENT_INDEX] = -1
+ for section in self.sections:
+ # get data for this section and write to xls
+ section_name = section['name']
+ fields = [
+ element['xpath'] for element in
+ section['elements']] + self.EXTRA_FIELDS
+
+ ws = work_sheets[section_name]
+ # section might not exist within the output, e.g. data was
+ # not provided for said repeat - write test to check this
+ row = output.get(section_name, None)
+ if type(row) == dict:
+ XlsExportBuilder.write_row(
+ self.pre_process_row(row, section),
+ ws, fields, work_sheet_titles)
+ elif type(row) == list:
+ for child_row in row:
+ XlsExportBuilder.write_row(
+ self.pre_process_row(child_row, section),
+ ws, fields, work_sheet_titles)
+
+ wb.save(filename=path)
diff --git a/onadata/settings/common.py b/onadata/settings/common.py
index d45b30cd3..528a50506 100644
--- a/onadata/settings/common.py
+++ b/onadata/settings/common.py
@@ -224,6 +224,7 @@
'onadata.apps.stats',
'onadata.apps.sms_support',
'onadata.libs',
+ 'onadata.apps.sheets_sync',
)
OAUTH2_PROVIDER = {
@@ -397,9 +398,23 @@ def configure_logging(logger, **kwargs):
'PASSWORD': ''
}
-GOOGLE_STEP2_URI = 'http://ona.io/gwelcome'
-GOOGLE_CLIENT_ID = '617113120802.onadata.apps.googleusercontent.com'
-GOOGLE_CLIENT_SECRET = '9reM29qpGFPyI8TBuB54Z4fk'
+GOOGLE_STEP2_URI = 'http://localhost:8001/gwelcome'
+GOOGLE_CLIENT_ID = '896862299299-mv5q1t7qmljc3m4f7l74n0c1nf7pdcqn.apps.googleusercontent.com'
+GOOGLE_CLIENT_SECRET = 'rRYajhQEuQszfx8jW0nfehgT'
+GOOGLE_CLIENT_EMAIL = os.environ.get('GOOGLE_CLIENT_EMAIL',
+ '896862299299-c651sc4ne7t9v23bk70s7m70b37h9e3k@developer.gserviceaccount.com')
+GOOGLE_CLIENT_PRIVATE_KEY_PATH = os.environ.get('GOOGLE_CLIENT_PRIVATE_KEY_PATH',
+ os.path.join(PROJECT_ROOT, 'settings/google-private-key.p12'))
+
+def _get_google_client_private_key():
+ try:
+ with open(GOOGLE_CLIENT_PRIVATE_KEY_PATH) as f:
+ return f.read()
+ except EnvironmentError as e:
+ print 'Could not open private key file: %s' % e
+
+GOOGLE_CLIENT_PRIVATE_KEY = _get_google_client_private_key()
+
THUMB_CONF = {
'large': {'size': 1280, 'suffix': '-large'},
diff --git a/requirements/base.pip b/requirements/base.pip
index b94342b43..deab49b42 100644
--- a/requirements/base.pip
+++ b/requirements/base.pip
@@ -8,6 +8,7 @@ django-guardian==1.2.4
django-registration-redux==1.1
django-templated-email==0.4.9
gdata==2.0.18
+gspread==0.2.5
httplib2==0.9
mock==1.0.1
httmock==1.2.2
@@ -60,6 +61,7 @@ django-taggit==0.12.1
# oath2 support
django-oauth-toolkit==0.7.2
+oauth2client==1.4.12
# spss
https://bitbucket.org/fomcl/savreaderwriter/downloads/savReaderWriter-3.3.0.zip#egg=savreaderwriter