-
Notifications
You must be signed in to change notification settings - Fork 1
/
study_data_dist.py
35 lines (29 loc) · 1.71 KB
/
study_data_dist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import numpy as np
import pandas as pd
datasets={
"train": "MAFAT RADAR Challenge - Training Set V1",
"train_spliced": "MAFAT RADAR Challenge - Training Set V1 - Spliced",
"experiment": "MAFAT RADAR Challenge - Auxiliary Experiment Set V2",
"synthetic": "MAFAT RADAR Challenge - Auxiliary Synthetic Set V2",
"test": "MAFAT RADAR Challenge - Public Test Set V1",
"empty": "MAFAT RADAR Challenge - Auxiliary Background(empty) Set V1",
}
data_folder_path = 'data'
df_list = []
for set_name, file_name in datasets.items():
if set_name == 'test':
continue
df = pd.read_csv(os.path.join(data_folder_path, file_name + '.csv'))
df['set_name'] = set_name
df['val'] = int(1)
df_list.append(df)
concatenated_df = pd.concat(df_list)
# Create pivot tables:
pt_target_type_vs_set_name = pd.pivot_table(concatenated_df, values='val', index='target_type', columns='set_name', aggfunc=np.sum)
pt_target_type_vs_geolocation_type_and_id = pd.pivot_table(concatenated_df, values='val', index='target_type', columns=['geolocation_type', 'geolocation_id'], aggfunc=np.sum)
pt_target_type_vs_geolocation_type = pd.pivot_table(concatenated_df, values='val', index='target_type', columns='geolocation_type', aggfunc=np.sum)
pt_target_type_vs_sensor_id = pd.pivot_table(concatenated_df, values='val', index='target_type', columns='sensor_id', aggfunc=np.sum)
pt_geolocation_vs_sensor_id = pd.pivot_table(concatenated_df, values='val', index='geolocation_type', columns='sensor_id', aggfunc=np.sum)
pt_geolocation_and_target_type_vs_sensor_id = pd.pivot_table(concatenated_df, values='val', index=['geolocation_type', 'target_type'], columns='sensor_id', aggfunc=np.sum)
x = 0