-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
datasites.py
132 lines (102 loc) · 48.1 KB
/
datasites.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python
# coding: utf-8
import syft as sy
from datasets import load_data, generate_mock
from datasets import NAMES, CLEVELAND, HUNGARY, SWITZERLAND, LONG_BEACH
from threading import current_thread
from time import sleep
from typing import Optional
DATASITE_PORTS = {name: (54879 + i) for i, name in enumerate(NAMES)}
DATASITE_URLS = {
name: f"http://localhost:{port}" for name, port in DATASITE_PORTS.items()
}
INSTITUTE_FULLNAMES = {
CLEVELAND: "Clevelan Clinic, Ohio (USA)",
SWITZERLAND: "University Hospitals of Zurich and Basel (Switzerland)",
HUNGARY: "Hungarian Institute of Cardiology, Budapest (Hungary)",
LONG_BEACH: "Veteran Administration Medical Center, Long Beach, California (USA)",
}
def create_syft_dataset(name: str) -> Optional[sy.Dataset]:
"""Creates a new syft.Dataset for the selected datasite/dataset.
None is returned is the matching dataset cannot be found/load from disk.
"""
data = load_data(name=name)
if data is None:
return None
full_name = INSTITUTE_FULLNAMES[name]
dataset = sy.Dataset(
name="Heart Disease Dataset",
summary=(sumry := f"Heart Disease Dataset from {full_name}"),
description=f"""
## Heart Disease Dataset
**Institute**: {full_name}
**Descriptive Statistics** (demographics removed):
{data.drop(["age", "sex"], axis=1).describe().to_markdown()}
**Demographics**:
- `age`: Integer, _age in years_
- `sex`: Categorical, `1 = male; 0 = female`
**Outcome**:
- `num`: Integer, _Angiographic Disease Status_
- Diagnosis: Absence (`num = 0`); Presence (`num > 0`)
""",
) # type: ignore
dataset.add_asset(
sy.Asset(
name="Heart Study Data",
data=data,
mock=generate_mock(data=data, seed=len(name)),
)
)
return dataset
def _get_welcome_message(name: str, full_name: str) -> str:
return f"""
<img src="" alt="Logo" style="width:48px;height:48px;padding:3px;">
## Welcome to the {name} Datasite
**Institute**: {full_name}
**Deployment Type**: Local
"""
def spawn_server(sid: int):
"""Utility function to launch a new instance of a PySyft Datasite"""
name = NAMES[sid % len(NAMES)]
data_site = sy.orchestra.launch(
name=name,
port=DATASITE_PORTS[name],
reset=True,
n_consumers=3,
create_producer=True,
)
client = data_site.login(email="[email protected]", password="changethis")
# Customise Settings
client.settings.allow_guest_signup(True)
client.settings.welcome_customize(
markdown=_get_welcome_message(name=name, full_name=INSTITUTE_FULLNAMES[name])
)
client.users.create(
email="[email protected]",
password="****",
password_verify="****",
name="OpenMined Researcher",
institution="OpenMined",
website="https://openmined.org",
)
user = client.users[-1]
# user.allow_mock_execution(True)
ds = create_syft_dataset(name)
if not ds is None:
client.upload_dataset(ds)
print(f"Datasite {name} is up and running: {data_site.url}:{data_site.port}")
return data_site, client
def check_and_approve_incoming_requests(client):
"""This utility function will set the server in busy-waiting
to constantly check and auto-approve any incoming code requests.
Note: This function is only intended for the tutorial as demonstration
of the PoC example.
For further information about please check out the official for the
Requests API: https://docs.openmined.org/en/latest/components/requests-api.html
"""
while not current_thread().stopped(): # type: ignore
requests = client.requests
for r in filter(lambda r: r.status.value != 2, requests): # 2 == APPROVED
r.approve(approve_nested=True)
# print("New Request approved in ")
sleep(1)