-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathbene_id_mapping.sql
231 lines (217 loc) · 12.8 KB
/
bene_id_mapping.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
-- bene_id_mapping.sql: Create bene_id mapping table for CMS deidentification
-- Copyright (c) 2017 University of Kansas Medical Center
--select
--'select /*+ PARALLEL(' || table_name ||',12) */ distinct bene_id from ' || table_name || ' union'
--from dba_tables where owner = 'CMS_ID_SAMPLE' and table_name not like 'REF_%' and table_name != 'BENE_ID_MAPPING';
-- Create bene_id_deid for users who where not in previous CMS data.
insert /*+ APPEND */ into bene_id_mapping
select
ubid.bene_id bene_id,
to_char(bene_id_deid_seq.nextval) bene_id_deid,
round(dbms_random.value(-364,0)) date_shift_days,
dob_shift.dob_shift_months
from (
select /*+ PARALLEL(HHA_OCCURRNCE_CODES,12) */ distinct bene_id from HHA_OCCURRNCE_CODES union
select /*+ PARALLEL(PDE,12) */ distinct bene_id from PDE union
select /*+ PARALLEL(BCARRIER_CLAIMS_K,12) */ distinct bene_id from BCARRIER_CLAIMS_K union
select /*+ PARALLEL(HHA_VALUE_CODES,12) */ distinct bene_id from HHA_VALUE_CODES union
select /*+ PARALLEL(HOSPICE_OCCURRNCE_CODES,12) */ distinct bene_id from HOSPICE_OCCURRNCE_CODES union
select /*+ PARALLEL(HHA_CONDITION_CODES,12) */ distinct bene_id from HHA_CONDITION_CODES union
select /*+ PARALLEL(OUTPATIENT_REVENUE_CENTER_K,12) */ distinct bene_id from OUTPATIENT_REVENUE_CENTER_K union
select /*+ PARALLEL(HHA_SPAN_CODES,12) */ distinct bene_id from HHA_SPAN_CODES union
select /*+ PARALLEL(HOSPICE_REVENUE_CENTER_K,12) */ distinct bene_id from HOSPICE_REVENUE_CENTER_K union
select /*+ PARALLEL(HHA_BASE_CLAIMS_K,12) */ distinct bene_id from HHA_BASE_CLAIMS_K union
select /*+ PARALLEL(BCARRIER_LINE_K,12) */ distinct bene_id from BCARRIER_LINE_K union
select /*+ PARALLEL(HOSPICE_SPAN_CODES,12) */ distinct bene_id from HOSPICE_SPAN_CODES union
select /*+ PARALLEL(HOSPICE_CONDITION_CODES,12) */ distinct bene_id from HOSPICE_CONDITION_CODES union
select /*+ PARALLEL(HHA_REVENUE_CENTER_K,12) */ distinct bene_id from HHA_REVENUE_CENTER_K union
select /*+ PARALLEL(MEDPAR_ALL,12) */ distinct bene_id from MEDPAR_ALL union
select /*+ PARALLEL(HOSPICE_VALUE_CODES,12) */ distinct bene_id from HOSPICE_VALUE_CODES union
select /*+ PARALLEL(MBSF_ABCD_SUMMARY,12) */ distinct bene_id from MBSF_ABCD_SUMMARY union
select /*+ PARALLEL(OUTPATIENT_SPAN_CODES,12) */ distinct bene_id from OUTPATIENT_SPAN_CODES union
select /*+ PARALLEL(HOSPICE_BASE_CLAIMS_K,12) */ distinct bene_id from HOSPICE_BASE_CLAIMS_K union
select /*+ PARALLEL(OUTPATIENT_BASE_CLAIMS_K,12) */ distinct bene_id from OUTPATIENT_BASE_CLAIMS_K union
select /*+ PARALLEL(OUTPATIENT_OCCURRNCE_CODES,12) */ distinct bene_id from OUTPATIENT_OCCURRNCE_CODES union
select /*+ PARALLEL(OUTPATIENT_VALUE_CODES,12) */ distinct bene_id from OUTPATIENT_VALUE_CODES union
select /*+ PARALLEL(OUTPATIENT_CONDITION_CODES,12) */ distinct bene_id from OUTPATIENT_CONDITION_CODES union
select /*+ PARALLEL(BCARRIER_DEMO_CODES,12) */ distinct bene_id from BCARRIER_DEMO_CODES union
select /*+ PARALLEL(HHA_DEMO_CODES,12) */ distinct bene_id from HHA_DEMO_CODES union
select /*+ PARALLEL(HOSPICE_DEMO_CODES,12) */ distinct bene_id from HOSPICE_DEMO_CODES union
select /*+ PARALLEL(MAXDATA_IP,12) */ distinct bene_id from MAXDATA_IP union
select /*+ PARALLEL(MAXDATA_LT,12) */ distinct bene_id from MAXDATA_LT union
select /*+ PARALLEL(MAXDATA_OT,12) */ distinct bene_id from MAXDATA_OT union
select /*+ PARALLEL(MAXDATA_PS,12) */ distinct bene_id from MAXDATA_PS union
select /*+ PARALLEL(MAXDATA_RX,12) */ distinct bene_id from MAXDATA_RX union
select /*+ PARALLEL(OUTPATIENT_DEMO_CODES,12) */ distinct bene_id from OUTPATIENT_DEMO_CODES
) ubid
left join dob_shift on dob_shift.bene_id = ubid.bene_id
left join "&&prev_cms_id_schema"."&&bene_id_map_prev_yrs_cumu" prev_ubid on prev_ubid.bene_id = ubid.bene_id
where prev_ubid.bene_id is null;
commit;
-- Reuse bene_id_deid for users who where in previous CMS data.
insert /*+ APPEND */ into bene_id_mapping
select
ubid.bene_id bene_id,
prev_ubid.bene_id_deid bene_id_deid,
prev_ubid.date_shift_days date_shift_days,
prev_ubid.dob_shift_months dob_shift_months
from (
select /*+ PARALLEL(HHA_OCCURRNCE_CODES,12) */ distinct bene_id from HHA_OCCURRNCE_CODES union
select /*+ PARALLEL(PDE,12) */ distinct bene_id from PDE union
select /*+ PARALLEL(BCARRIER_CLAIMS_K,12) */ distinct bene_id from BCARRIER_CLAIMS_K union
select /*+ PARALLEL(HHA_VALUE_CODES,12) */ distinct bene_id from HHA_VALUE_CODES union
select /*+ PARALLEL(HOSPICE_OCCURRNCE_CODES,12) */ distinct bene_id from HOSPICE_OCCURRNCE_CODES union
select /*+ PARALLEL(HHA_CONDITION_CODES,12) */ distinct bene_id from HHA_CONDITION_CODES union
select /*+ PARALLEL(OUTPATIENT_REVENUE_CENTER_K,12) */ distinct bene_id from OUTPATIENT_REVENUE_CENTER_K union
select /*+ PARALLEL(HHA_SPAN_CODES,12) */ distinct bene_id from HHA_SPAN_CODES union
select /*+ PARALLEL(HOSPICE_REVENUE_CENTER_K,12) */ distinct bene_id from HOSPICE_REVENUE_CENTER_K union
select /*+ PARALLEL(HHA_BASE_CLAIMS_K,12) */ distinct bene_id from HHA_BASE_CLAIMS_K union
select /*+ PARALLEL(BCARRIER_LINE_K,12) */ distinct bene_id from BCARRIER_LINE_K union
select /*+ PARALLEL(HOSPICE_SPAN_CODES,12) */ distinct bene_id from HOSPICE_SPAN_CODES union
select /*+ PARALLEL(HOSPICE_CONDITION_CODES,12) */ distinct bene_id from HOSPICE_CONDITION_CODES union
select /*+ PARALLEL(HHA_REVENUE_CENTER_K,12) */ distinct bene_id from HHA_REVENUE_CENTER_K union
select /*+ PARALLEL(MEDPAR_ALL,12) */ distinct bene_id from MEDPAR_ALL union
select /*+ PARALLEL(HOSPICE_VALUE_CODES,12) */ distinct bene_id from HOSPICE_VALUE_CODES union
select /*+ PARALLEL(MBSF_ABCD_SUMMARY,12) */ distinct bene_id from MBSF_ABCD_SUMMARY union
select /*+ PARALLEL(OUTPATIENT_SPAN_CODES,12) */ distinct bene_id from OUTPATIENT_SPAN_CODES union
select /*+ PARALLEL(HOSPICE_BASE_CLAIMS_K,12) */ distinct bene_id from HOSPICE_BASE_CLAIMS_K union
select /*+ PARALLEL(OUTPATIENT_BASE_CLAIMS_K,12) */ distinct bene_id from OUTPATIENT_BASE_CLAIMS_K union
select /*+ PARALLEL(OUTPATIENT_OCCURRNCE_CODES,12) */ distinct bene_id from OUTPATIENT_OCCURRNCE_CODES union
select /*+ PARALLEL(OUTPATIENT_VALUE_CODES,12) */ distinct bene_id from OUTPATIENT_VALUE_CODES union
select /*+ PARALLEL(OUTPATIENT_CONDITION_CODES,12) */ distinct bene_id from OUTPATIENT_CONDITION_CODES union
select /*+ PARALLEL(BCARRIER_DEMO_CODES,12) */ distinct bene_id from BCARRIER_DEMO_CODES union
select /*+ PARALLEL(HHA_DEMO_CODES,12) */ distinct bene_id from HHA_DEMO_CODES union
select /*+ PARALLEL(HOSPICE_DEMO_CODES,12) */ distinct bene_id from HOSPICE_DEMO_CODES union
select /*+ PARALLEL(MAXDATA_IP,12) */ distinct bene_id from MAXDATA_IP union
select /*+ PARALLEL(MAXDATA_LT,12) */ distinct bene_id from MAXDATA_LT union
select /*+ PARALLEL(MAXDATA_OT,12) */ distinct bene_id from MAXDATA_OT union
select /*+ PARALLEL(MAXDATA_PS,12) */ distinct bene_id from MAXDATA_PS union
select /*+ PARALLEL(MAXDATA_RX,12) */ distinct bene_id from MAXDATA_RX union
select /*+ PARALLEL(OUTPATIENT_DEMO_CODES,12) */ distinct bene_id from OUTPATIENT_DEMO_CODES
) ubid
left join dob_shift on dob_shift.bene_id = ubid.bene_id
left join "&&prev_cms_id_schema"."&&bene_id_map_prev_yrs_cumu" prev_ubid on prev_ubid.bene_id = ubid.bene_id
where prev_ubid.bene_id is not null;
commit;
create unique index bene_id_mapping_bid_idx on bene_id_mapping (bene_id);
create unique index bene_id_mapping_deidbid_idx on bene_id_mapping (bene_id_deid);
-- First, insert people without bene_ids (who therefore need a date shift)
insert /*+ APPEND */ into msis_person
select
umid.bene_id bene_id,
umid.msis_id msis_id,
umid.state_cd state_cd,
CASE
WHEN prev_msis.msis_id is not null and prev_msis.state_cd is not null
THEN prev_msis.date_shift_days
ELSE round(dbms_random.value(-364,0))
END date_shift_days,
CASE
WHEN prev_msis.msis_id is not null and prev_msis.state_cd is not null
THEN prev_msis.dob_shift_months
ELSE dob_shift.dob_shift_months
END dob_shift_months
from (
-- The Personal Summary File contains one record for every individual enrolled
-- for at least one day during the year. So, it's not actually necessary to
-- select from the union of all the Medicaid tables with the production data.
-- However, in order for the process to work for the sample data, we need to
-- look at all the tables.
-- https://www.resdac.org/cms-data/files/max-ps
select /*+ PARALLEL(MAXDATA_PS,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_PS where bene_id is null
union
select /*+ PARALLEL(MAXDATA_IP,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_IP where bene_id is null
union
select /*+ PARALLEL(MAXDATA_LT,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_LT where bene_id is null
union
select /*+ PARALLEL(MAXDATA_OT,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_OT where bene_id is null
union
select /*+ PARALLEL(MAXDATA_RX,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_RX where bene_id is null
) umid
left join dob_shift on dob_shift.msis_id = umid.msis_id
and dob_shift.state_cd = umid.state_cd
left join "&&prev_cms_id_schema"."&&msis_person_prev_yrs_cumu" prev_msis
on prev_msis.msis_id = umid.msis_id and prev_msis.state_cd = umid.state_cd
;
commit;
-- Next, get everyone who has a bene_id (who will get a date/dob shift per bene_id)
insert /*+ APPEND */ into msis_person
select
umid.bene_id bene_id,
umid.msis_id msis_id,
umid.state_cd state_cd,
null date_shift_days,
null dob_shift_months
from (
select /*+ PARALLEL(MAXDATA_PS,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_PS where bene_id is not null
union
select /*+ PARALLEL(MAXDATA_IP,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_IP where bene_id is not null
union
select /*+ PARALLEL(MAXDATA_LT,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_LT where bene_id is not null
union
select /*+ PARALLEL(MAXDATA_OT,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_OT where bene_id is not null
union
select /*+ PARALLEL(MAXDATA_RX,12) */ distinct bene_id, msis_id, state_cd from MAXDATA_RX where bene_id is not null
) umid;
commit;
create unique index msis_person_mid_st_idx on msis_person (msis_id, state_cd);
insert /*+ APPEND */ into msis_id_mapping
select
umid.msis_id msis_id,
coalesce(prev_mmap.msis_id_deid, to_char(msis_id_deid_seq.nextval)) msis_id_deid
from (
select /*+ PARALLEL(msis_person,12) */ distinct msis_id from msis_person
) umid
left join "&&prev_cms_id_schema"."&&msis_id_mapping_prev_yr_cumu" prev_mmap
on prev_mmap.msis_id = umid.msis_id
;
commit;
create unique index msis_id_mapping_mid_idx on msis_id_mapping (msis_id);
create unique index msis_id_mapping_deidmid_idx on msis_id_mapping (msis_id_deid);
-- Build the i2b2-shaped patient mapping in the DEID schema
whenever sqlerror continue;
drop table pmap_parts;
whenever sqlerror exit;
create table pmap_parts (
BENE_ID_DEID VARCHAR2(15),
MSIS_ID_DEID VARCHAR2(32),
STATE_CD VARCHAR2(2),
PATIENT_NUM number(38,0)
);
alter table pmap_parts parallel (degree 12);
insert /*+ APPEND */ into pmap_parts
select /*+ PARALLEL(bene_id_mapping,12) PARALLEL(msis_person,12) PARALLEL(msis_id_mapping,12) */
bmap.bene_id_deid, mmap.msis_id_deid, mper.state_cd,
coalesce(to_number(bmap.bene_id_deid), bene_id_deid_seq.nextval) patient_num
from bene_id_mapping bmap
left join msis_person mper on mper.bene_id = bmap.bene_id
left join msis_id_mapping mmap on mmap.msis_id = mper.msis_id
;
commit;
-- Insert bene_id_deid mappings
-- Distinct because one bene_id may be linked to multiple msis_id + state_cd and
-- therefore have multiple rows in the pmap_parts table.
insert /*+ APPEND */ into "&&deid_schema".patient_mapping
select /*+ PARALLEL(pmap_parts,12) */ distinct
bene_id_deid patient_ide, bene_cd patient_ide_source, bene_id_deid patient_num,
'A' patient_ide_status, '&&project_id' project_id, sysdate upload_date, sysdate update_date,
sysdate download_date, sysdate import_date, '&&cms_source_cd' sourcesystem_cd, &&upload_id upload_id
from pmap_parts
cross join cms_key_sources
where bene_id_deid is not null
;
commit;
------------------------------------------------------------------------------------------
-- Insert msis_id + state_cd mappings
insert /*+ APPEND */ into "&&deid_schema".patient_mapping
select /*+ PARALLEL(pmap_parts,12) */
fmt_msis_pat_ide(to_char(msis_id_deid), state_cd) patient_ide,
msis_cd patient_ide_source,
patient_num,
'A' patient_ide_status, '&&project_id' project_id, sysdate upload_date, sysdate update_date,
sysdate download_date, sysdate import_date, '&&cms_source_cd' sourcesystem_cd, &&upload_id upload_id
from pmap_parts
cross join cms_key_sources cks
where msis_id_deid is not null and state_cd is not null
;
commit;