This repository has been archived by the owner on Dec 8, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
update.py
78 lines (72 loc) · 2.46 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from dataflows import Flow, load, dump_to_path, dump_to_zip, printer, add_metadata
from dataflows import sort_rows, filter_rows, find_replace, delete_fields, set_type, validate, unpivot
od19_base = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTtoCwqVV9EBhHMcmmCI5FxIELLTT5IdEVrKIMImWmfcq4iE1xOW-_90Rs-dt3JCkb-1DxjNJRCjy40/pub?single=true&output=csv&gid='
od19_feedback = '587615265'
od19_analysis = '218970622'
def not_empty_groupcol(rows):
deduplicate = []
for row in rows:
if 'Alle "Bedürfnisse"' not in row:
yield row
elif row['Alle "Bedürfnisse"'].strip() and \
row['Anzahl Auflistung (Zahl)'] is not None:
v = row['Alle "Bedürfnisse"'].strip()
if not v in deduplicate:
deduplicate.append(v)
yield row
def conference_csv():
flow = Flow(
# Load inputs
load(
od19_base + od19_feedback,
name='feedback',
format='csv',
),
load(
od19_base + od19_analysis,
name='analysis',
format='csv',
),
# Process them
set_type("Anzahl.*", type='integer', resources='analysis'),
delete_fields([
"Anzahl Auflistung",
".*\\(Formel\\)",
".*Duplikate",
], resources='analysis'
),
not_empty_groupcol,
# Save the results
add_metadata(
name='opendatach19',
title='''Opendata.ch/2019 Forum''',
licenses=[{
"name": "ODC-PDDL-1.0",
"path": "http://opendatacommons.org/licenses/pddl/",
"title": "Open Data Commons Public Domain Dedication and License v1.0"
}],
maintainers=[{
"name": "Oleg Lavrovsky",
"web": "https://datalets.ch/"
}],
views=[{
"name": "Groups",
"resources": [ "analysis" ],
"spec": {
"group": "Alle ""Bedürfnisse""",
"series": [
"Anzahl Auflistung (Zahl)"
],
"type": "bar"
},
"specType": "simple",
"title": "Topic counts"
}]
),
printer(),
validate(),
dump_to_path('data/opendatach19'),
)
flow.process()
if __name__ == '__main__':
conference_csv()