Skip to content

Commit af4d6bd

Browse files
committed
feat: copy progress + cancel mid copy
1 parent 79d4760 commit af4d6bd

File tree

12 files changed

+597
-130
lines changed

12 files changed

+597
-130
lines changed

swift_browser_ui/ui/api.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,87 @@ async def swift_replicate_container(
972972
)
973973

974974

975+
async def swift_replicate_status(request: aiohttp.web.Request) -> aiohttp.web.Response:
976+
"""Proxy replication status from upload-runner back to UI."""
977+
session = await aiohttp_session.get_session(request)
978+
request.app["Log"].info(
979+
"API call for replication status from "
980+
f"{request.remote}, sess: {session} :: {time.ctime()}"
981+
)
982+
983+
project = request.query.get("project") or request.query.get("runner_project")
984+
if not project:
985+
raise aiohttp.web.HTTPBadRequest(reason="Missing ?project=<projectId>")
986+
987+
runner_id = await open_upload_runner_session(request, project=project)
988+
989+
job_id = request.match_info["job_id"]
990+
path = f"/replicate/status/{job_id}"
991+
signature = await sign(3600, path)
992+
993+
client = request.app["api_client"]
994+
url = f"{setd['upload_internal_endpoint']}{path}"
995+
996+
async with client.get(
997+
url,
998+
cookies={"RUNNER_SESSION_ID": runner_id},
999+
params=signature,
1000+
ssl=ssl_context,
1001+
) as upstream:
1002+
body = await upstream.read()
1003+
1004+
# aiohttp.web.Response(content_type=...) cannot include charset
1005+
ctype = upstream.headers.get("Content-Type", "application/json")
1006+
if ";" in ctype:
1007+
ctype = ctype.split(";", 1)[0].strip()
1008+
1009+
return aiohttp.web.Response(
1010+
status=upstream.status,
1011+
body=body,
1012+
content_type=ctype,
1013+
)
1014+
1015+
1016+
async def swift_replicate_cancel(request: aiohttp.web.Request) -> aiohttp.web.Response:
1017+
"""Proxy replication cancel from UI to upload-runner."""
1018+
session = await aiohttp_session.get_session(request)
1019+
request.app["Log"].info(
1020+
"API call for replication cancel from "
1021+
f"{request.remote}, sess: {session} :: {time.ctime()}"
1022+
)
1023+
1024+
project = request.query.get("project") or request.query.get("runner_project")
1025+
if not project:
1026+
raise aiohttp.web.HTTPBadRequest(reason="Missing ?project=<projectId>")
1027+
1028+
runner_id = await open_upload_runner_session(request, project=project)
1029+
1030+
job_id = request.match_info["job_id"]
1031+
path = f"/replicate/cancel/{job_id}"
1032+
signature = await sign(3600, path)
1033+
1034+
client = request.app["api_client"]
1035+
url = f"{setd['upload_internal_endpoint']}{path}"
1036+
1037+
async with client.post(
1038+
url,
1039+
cookies={"RUNNER_SESSION_ID": runner_id},
1040+
params=signature,
1041+
ssl=ssl_context,
1042+
) as upstream:
1043+
body = await upstream.read()
1044+
1045+
ctype = upstream.headers.get("Content-Type", "application/json")
1046+
if ";" in ctype:
1047+
ctype = ctype.split(";", 1)[0].strip()
1048+
1049+
return aiohttp.web.Response(
1050+
status=upstream.status,
1051+
body=body,
1052+
content_type=ctype,
1053+
)
1054+
1055+
9751056
async def get_upload_session(
9761057
request: aiohttp.web.Request,
9771058
) -> aiohttp.web.Response:

swift_browser_ui/ui/server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@
4141
swift_list_containers,
4242
swift_list_objects,
4343
swift_put_object,
44+
swift_replicate_cancel,
4445
swift_replicate_container,
46+
swift_replicate_status,
4547
swift_update_container_metadata,
4648
)
4749
from swift_browser_ui.ui.discover import handle_discover
@@ -305,6 +307,8 @@ async def on_prepare(
305307
aiohttp.web.post(
306308
"/replicate/{project}/{container}", swift_replicate_container
307309
),
310+
aiohttp.web.get("/replicate/status/{job_id}", swift_replicate_status),
311+
aiohttp.web.post("/replicate/cancel/{job_id}", swift_replicate_cancel),
308312
]
309313
)
310314

swift_browser_ui/upload/api.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import time
77
import typing
8+
import uuid
89

910
import aiohttp.web
1011
import msgpack
@@ -98,9 +99,33 @@ async def handle_replicate_container(
9899
await replicator.a_ensure_container()
99100
await replicator.a_ensure_container(segmented=True)
100101

101-
asyncio.ensure_future(replicator.a_copy_from_container())
102+
job_id = uuid.uuid4().hex[:12]
102103

103-
return aiohttp.web.Response(status=202)
104+
# create job record
105+
request.app["replication_jobs"][job_id] = {
106+
"state": "running", # running | finished | failed | cancelled
107+
"done": 0,
108+
"total": 0,
109+
"error": "",
110+
"cancel": False,
111+
}
112+
113+
async def runner():
114+
try:
115+
await replicator.a_copy_from_container(job_id=job_id, app=request.app)
116+
request.app["replication_jobs"][job_id]["state"] = "finished"
117+
except asyncio.CancelledError:
118+
request.app["replication_jobs"][job_id]["state"] = "cancelled"
119+
return
120+
except Exception as e:
121+
request.app["replication_jobs"][job_id]["state"] = "failed"
122+
request.app["replication_jobs"][job_id]["error"] = str(e)
123+
return
124+
125+
task = asyncio.create_task(runner())
126+
request.app["replication_jobs"][job_id]["task"] = task
127+
128+
return aiohttp.web.json_response({"job_id": job_id}, status=202)
104129

105130

106131
async def handle_replicate_object(request: aiohttp.web.Request) -> aiohttp.web.Response:
@@ -138,6 +163,38 @@ async def handle_replicate_object(request: aiohttp.web.Request) -> aiohttp.web.R
138163
return aiohttp.web.Response(status=202)
139164

140165

166+
async def handle_replicate_status(request):
167+
"""Handle a request for getting replication job status."""
168+
job_id = request.match_info["job_id"]
169+
job = request.app["replication_jobs"].get(job_id)
170+
if not job:
171+
raise aiohttp.web.HTTPNotFound(reason="Job not found")
172+
173+
return aiohttp.web.json_response(
174+
{
175+
"state": job["state"],
176+
"done": job["done"],
177+
"total": job["total"],
178+
"error": job.get("error", ""),
179+
}
180+
)
181+
182+
183+
async def handle_replicate_cancel(request):
184+
"""Handle a request for cancelling a replication job."""
185+
job_id = request.match_info["job_id"]
186+
job = request.app["replication_jobs"].get(job_id)
187+
if not job:
188+
raise aiohttp.web.HTTPNotFound(reason="Job not found")
189+
190+
job["cancel"] = True
191+
task = job.get("task")
192+
if task and not task.done():
193+
task.cancel()
194+
195+
return aiohttp.web.json_response({"ok": True})
196+
197+
141198
async def handle_post_object_chunk(request: aiohttp.web.Request) -> aiohttp.web.Response:
142199
"""Handle a request for posting an object chunk."""
143200
if "from_object" in request.query.keys():

swift_browser_ui/upload/replicate.py

Lines changed: 85 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Container and object replication handlers using aiohttp."""
22

3+
import asyncio
34
import logging
45
import os
56
import ssl
@@ -26,6 +27,11 @@
2627
REPL_TIMEOUT = 16384
2728

2829

30+
def _is_cancelled(app, job_id: str) -> bool:
31+
job = app["replication_jobs"].get(job_id) if app else None
32+
return (job is None) or job.get("cancel", False)
33+
34+
2935
class ObjectReplicationProxy:
3036
"""A class for replicating objects."""
3137

@@ -90,8 +96,11 @@ async def a_ensure_container(self, segmented: bool = False) -> None:
9096

9197
LOGGER.info(f"Created container '{container}'.")
9298

93-
async def a_sync_object_segments(self, manifest: str) -> str:
99+
async def a_sync_object_segments(self, manifest: str, job_id: str, app) -> str:
94100
"""Get object segments."""
101+
if _is_cancelled(app, job_id):
102+
raise asyncio.CancelledError()
103+
95104
async with self.client.get(
96105
common.generate_download_url(
97106
self.source_host, container=manifest.split("/")[0]
@@ -123,6 +132,8 @@ def filter_with_prefix(segment: str) -> bool:
123132
LOGGER.debug(f"Got following segments: {segments}")
124133

125134
for segment in segments:
135+
if _is_cancelled(app, job_id):
136+
raise asyncio.CancelledError()
126137
from_url = common.generate_download_url(
127138
self.source_host, container=manifest.split("/")[0], object_name=segment
128139
)
@@ -152,6 +163,8 @@ def filter_with_prefix(segment: str) -> bool:
152163
reason="ETag missing, maybe segments file empty"
153164
)
154165

166+
if _is_cancelled(app, job_id):
167+
raise asyncio.CancelledError()
155168
to_url = common.generate_download_url(
156169
self.host, container=f"{self.container}_segments", object_name=segment
157170
)
@@ -175,8 +188,10 @@ def filter_with_prefix(segment: str) -> bool:
175188
)
176189
return new_manifest
177190

178-
async def a_copy_object(self, object_name: str) -> None:
191+
async def a_copy_object(self, object_name: str, job_id: str, app) -> None:
179192
"""Copy an object from a location."""
193+
if _is_cancelled(app, job_id):
194+
raise asyncio.CancelledError()
180195
# Get the object stream handle
181196
async with self.client.get(
182197
common.generate_download_url(
@@ -216,6 +231,8 @@ async def a_copy_object(self, object_name: str) -> None:
216231
raise aiohttp.web.HTTPUnprocessableEntity(
217232
reason="ETag missing, maybe segments file empty"
218233
)
234+
if _is_cancelled(app, job_id):
235+
raise asyncio.CancelledError()
219236
async with self.client.put(
220237
common.generate_download_url(self.host, self.container, object_name),
221238
data=resp_g.content.iter_chunked(65564),
@@ -235,13 +252,20 @@ async def a_copy_object(self, object_name: str) -> None:
235252
# segmented upload
236253
LOGGER.debug(f"Copying object {object_name} in segments.")
237254

255+
if _is_cancelled(app, job_id):
256+
raise asyncio.CancelledError()
238257
manifest = await self.a_sync_object_segments(
239-
resp_g.headers["X-Object-Manifest"]
258+
resp_g.headers["X-Object-Manifest"],
259+
job_id=job_id,
260+
app=app,
240261
)
241262

242263
LOGGER.debug("Uploading manifest")
243264
# Add manifest headers
244265
headers["X-Object-Manifest"] = manifest
266+
267+
if _is_cancelled(app, job_id):
268+
raise asyncio.CancelledError()
245269
# Create manifest file
246270
async with self.client.put(
247271
common.generate_download_url(
@@ -262,44 +286,80 @@ async def a_copy_single_object(self, object_name: str) -> None:
262286
"""Only copy a single object."""
263287
await self.a_copy_object(object_name)
264288

265-
async def a_get_container_page(self, marker: str = "") -> list[str]:
266-
"""Get a single page of objects from a container."""
289+
async def a_get_container_count(self) -> int:
290+
"""Get total object count in a container."""
291+
async with self.client.head(
292+
common.generate_download_url(
293+
self.source_host, container=self.source_container
294+
),
295+
headers={"X-Auth-Token": self.token},
296+
timeout=ClientTimeout(total=REPL_TIMEOUT),
297+
ssl=ssl_context,
298+
) as resp:
299+
if resp.status >= 400:
300+
raise aiohttp.web.HTTPBadRequest(
301+
reason="Could not HEAD source container."
302+
)
303+
304+
try:
305+
return int(resp.headers.get("X-Container-Object-Count", "0"))
306+
except ValueError:
307+
return 0
308+
309+
async def a_get_container_page(
310+
self, marker: str = "", limit: int = 10000
311+
) -> list[str]:
312+
"""Get one page of object names from a container."""
313+
params = {"limit": str(limit)}
314+
if marker:
315+
params["marker"] = marker
316+
267317
async with self.client.get(
268318
common.generate_download_url(
269-
self.source_host,
270-
container=self.source_container,
319+
self.source_host, container=self.source_container
271320
),
272321
headers={"X-Auth-Token": self.token},
273-
params={"marker": marker} if marker else None,
322+
params=params,
274323
timeout=ClientTimeout(total=REPL_TIMEOUT),
275324
ssl=ssl_context,
276325
) as resp:
277326
if resp.status >= 400:
278-
LOGGER.debug(f"Container fetch failed with status {resp.status}")
279327
raise aiohttp.web.HTTPBadRequest(
280328
reason="Could not fetch source container."
281329
)
282330

283331
if resp.status == 200:
284-
ret = await resp.text()
285-
return ret.rstrip().lstrip().split("\n")
332+
text = await resp.text()
333+
return [x for x in text.strip().split("\n") if x]
286334

287335
return []
288336

289-
async def a_copy_from_container(self) -> None:
290-
"""Copy objects from a source container."""
337+
async def a_copy_from_container(self, job_id: str, app) -> None:
338+
"""Copy objects from a source container with live progress + cancel."""
291339
LOGGER.debug(f"Fetching objects from container {self.source_container}")
292-
container_url = common.generate_download_url(
293-
self.source_host, container=self.source_container
294-
)
295-
LOGGER.debug(f"Container url: {container_url}")
296340

297-
# Page through all the objects in a container
298-
to_copy: list[str] = []
299-
page = await self.a_get_container_page()
300-
while page:
301-
to_copy = to_copy + page
302-
page = await self.a_get_container_page(to_copy[-1])
341+
job = app["replication_jobs"].get(job_id)
342+
if job is not None:
343+
job["total"] = await self.a_get_container_count()
344+
job["done"] = 0
345+
346+
marker = ""
347+
348+
while True:
349+
job = app["replication_jobs"].get(job_id)
350+
if job is None or job.get("cancel"):
351+
raise asyncio.CancelledError()
352+
353+
page = await self.a_get_container_page(marker=marker)
354+
if not page:
355+
break
356+
357+
for obj in page:
358+
job = app["replication_jobs"].get(job_id)
359+
if job is None or job.get("cancel"):
360+
raise asyncio.CancelledError()
361+
362+
await self.a_copy_object(obj, job_id=job_id, app=app)
303363

304-
for obj in to_copy:
305-
await self.a_copy_object(obj)
364+
job["done"] += 1
365+
marker = page[-1]

0 commit comments

Comments
 (0)