-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathaggregated_migrated.json.tmpl
189 lines (188 loc) · 8.09 KB
/
aggregated_migrated.json.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
{
"name": "aggregated_with_flags",
"database": "declarations_translated",
"loader": {
"type": "nacp",
"input_dir": "{datadir}/import/nacp",
"corrected_file": "${NACP_DECLARATIONS_PATH}/corrected.csv",
"state_file": "{datadir}/import/laststate_migrated"
},
"runner": {
"type": "chakra",
"sequential": true,
"design_documents": [
{
"name": "meta",
"views": ["views/meta.es6"]
},
{
"name": "aggregated",
"views": [
"views/aggregated/step_3_estate.es6", "views/aggregated/step_6_vehicles.es6",
"views/aggregated/step_11_incomes.es6", "views/aggregated/step_12_assets.es6"
]
},
{
"name": "red_flags",
"views": ["views/red_flags.es6", "views/change_form_red_flags.es6"]
}
]
},
"exporter": {
"type": "csv",
"state_file": "{datadir}/import/laststate_migrated",
"mappings": [
{
"view": "meta.meta",
"output": "{datadir}/export/meta.csv",
"columns": ["id", "link", "name", "year", "name_post", "family", "organization_group",
"region", "user_declarant_id", "corruption_affected", "post_category",
"post_category_extendedstatus", "post_type", "public_person", "responsible_position",
"date", "doc_type"]
},
{
"view": "aggregated.step_3_estate",
"output": "{datadir}/export/step_3_estate_agg.csv",
"columns": [
"id", "estate.declarant_land", "estate.declarant_other", "estate.family_land",
"estate.family_other", "estate.total_land", "estate.total_other", "estate.has_hidden",
"estate.has_foreign", "estate.family_land_ratio", "estate.family_other_ratio"
]
},
{
"view": "aggregated.step_6_vehicles",
"output": "{datadir}/export/step_6_vehicles_agg.csv",
"columns": [
"id", "vehicles.declarant_cost", "vehicles.family_cost", "vehicles.total_cost",
"vehicles.max_year", "vehicles.has_hidden", "vehicles.all_names", "vehicles.any"
]
},
{
"view": "aggregated.step_11_incomes",
"output": "{datadir}/export/step_11_incomes_agg.csv",
"columns": [
"id", "incomes.declarant", "incomes.family", "incomes.total", "incomes.prize", "incomes.has_hidden",
"incomes.has_foreign", "incomes.family_ratio"
]
},
{
"view": "aggregated.step_12_assets",
"output": "{datadir}/export/step_12_assets_agg.csv",
"columns": [
"id", "assets.declarant", "assets.family", "assets.total", "assets.lent", "assets.has_hidden",
"assets.has_foreign", "assets.family_ratio"
]
},
{
"view": "red_flags.red_flags",
"output": "{datadir}/export/red_flags.csv",
"columns": [
"id", "assets_to_income_flag", "income_presents_to_total_flag", "expenses_to_inc_and_assets_flag",
"liabilities_to_inc_and_assets_flag", "cash_flag", "garage_wo_car_flag", "house_no_land_flag",
"lux_cars_flag", "lux_cars_flag_v2", "vehicle_purch_no_cost_flag", "estate_purch_no_cost_flag",
"estate_has_hidden_cost", "corprights_abroad_flag", "bank_accounts_abroad_flag", "income_prize_fact",
"assets_lend_3rd_parties", "cash_flag_500k", "income_presents_to_total_flag_50", "hidden_in_family_flag",
"expenses.total", "liabilities.total", "assets.cash.total", "incomes.presents.all", "has_aircraft_flag",
"has_major_real_estate", "has_foreign_real_estate", "family_member_did_not_provide_information",
"has_huge_prize", "has_bo_abroad", "has_non_bank_liabilities"
]
},
{
"view": "red_flags.change_form_red_flags",
"output": "{datadir}/export/change_form_red_flags.csv",
"columns": [
"id", "untimely_submission"
]
}
]
},
"merger": {
"type": "csv",
"field": "id",
"how": "left",
"type_overrides": {
"year": "Int64Dtype",
"user_declarant_id": "Int64Dtype",
"assets_lend_3rd_parties": "bool",
"assets_to_income_flag": "bool",
"bank_accounts_abroad_flag": "bool",
"cash_flag": "bool",
"cash_flag_500k": "bool",
"corprights_abroad_flag": "bool",
"estate_has_hidden_cost": "bool",
"estate_purch_no_cost_flag": "bool",
"expenses_to_inc_and_assets_flag": "bool",
"family_member_did_not_provide_information": "bool",
"garage_wo_car_flag": "bool",
"has_aircraft_flag": "bool",
"has_bo_abroad": "bool",
"has_foreign_real_estate": "bool",
"has_huge_prize": "bool",
"has_major_real_estate": "bool",
"has_non_bank_liabilities": "bool",
"house_no_land_flag": "bool",
"income_presents_to_total_flag": "bool",
"income_presents_to_total_flag_50": "bool",
"income_prize_fact": "bool",
"liabilities_to_inc_and_assets_flag": "bool",
"lux_cars_flag": "bool",
"lux_cars_flag_v2": "bool",
"vehicle_purch_no_cost_flag": "bool",
"hidden_in_family_flag": "bool",
"family_member_did_not_provide_information_flag": "bool",
"untimely_submission": "bool",
"assets.total": "float64",
"vehicles.total_cost": "float64",
"incomes.total": "float64",
"estate.total_other": "float64",
"estate.total_land": "float64",
"estate.total_other": "float64",
"vehicles.total_cost": "float64"
},
"nan_replacements": {
"types": {
"int64": 0,
"float64": 0.0,
"bool": "False",
"boolean": false,
"object": ""
},
"columns": {
"region": "!не визначено",
"family": "False",
"incomes.has_hidden": "False",
"incomes.has_foreign": "False",
"assets.has_hidden": "False",
"assets.has_foreign": "False",
"estate.has_hidden": "False",
"estate.has_foreign": "False",
"vehicles.max_year": "!немає авто",
"vehicles.has_hidden": "False",
"vehicles.any": "False",
"year": -1,
"user_declarant_id": -1
}
},
"output": "{datadir}/export/aggregated_with_flags.csv",
"inputs": [
"{datadir}/export/meta.csv", "{datadir}/export/red_flags.csv", "{datadir}/export/change_form_red_flags.csv",
"{datadir}/export/step_3_estate_agg.csv", "{datadir}/export/step_6_vehicles_agg.csv",
"{datadir}/export/step_11_incomes_agg.csv", "{datadir}/export/step_12_assets_agg.csv"
],
"postprocess": ["{datadir}/filters/outlier_agg_filter.py", "{datadir}/filters/agg_postprocess.py"],
"only_years": []
},
"pump": {
"type": "csv_to_elasticsearch",
"index": "nacp_declarations_new",
"doc_type": "nacp_declaration_doctype",
"match_field": "id",
"container_field": "aggregated",
"input": "{datadir}/export/aggregated_with_flags.csv",
"state_file": "{datadir}/import/laststate_migrated",
"filter_values": {
"year": -1,
"user_declarant_id": -1
}
}
}