Skip to content

Commit

Permalink
Force updating the job status to KILLED when killing a job that has a…
Browse files Browse the repository at this point in the history
… connected agent but no response observer
  • Loading branch information
bhou committed Oct 24, 2023
1 parent db4262a commit 77cf7e6
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ public void killJob(
if (responseObserver == null) {
// This might happen when the agent has gone but its status is not updated
// In this case, we force updating the job status to KILLED.
log.warn("Job {} not killed. Expected local agent connection not found. "
log.warn("Tried to kill Job {}, but expected local agent connection not found. "
+ "Trying to force updating the job status to {}",
jobId,
JobStatus.KILLED
Expand All @@ -167,15 +167,22 @@ public void killJob(
jobId,
JobStatus.KILLED
);
} catch (final Exception e) {
log.error("Succeeded to force updating the status of Job {} to {}",
} catch (final GenieInvalidStatusException e) {
log.error(
"Failed to force updating the status of Job {} to {} "
+ "due to current status not being expected {}",
jobId,
JobStatus.KILLED
JobStatus.KILLED,
currentJobStatus
);
throw new GenieServerException("Failed to force updating the status of Job "
+ jobId + " to " + JobStatus.KILLED,
e
throw e;
} catch (final NotFoundException e) {
log.error(
"Failed to force updating the status of Job {} to {} due to job not found",
jobId,
JobStatus.KILLED
);
throw new GenieJobNotFoundException(e);
}
} else {
responseObserver.onNext(JobKillRegistrationResponse.newBuilder().build());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,42 @@ class GRpcJobKillServiceImplSpec extends Specification {
0 * this.agentRoutingService.isAgentConnectionLocal(this.jobId)
noExceptionThrown()

when: "The job is active, the agent is connected, the job is local but no observer"
when: "The job is active, the agent is connected, the job is local but no observer, and force updating job status succeeded"
this.serviceSpy.killJob(this.jobId, this.reason, this.servletRequest)

then: "Force updating job status"
then: "The database is updated and no exception is thrown"
1 * this.persistenceService.getJobStatus(this.jobId) >> JobStatus.CLAIMED
1 * this.persistenceService.updateJobStatus(_ as String, _ as JobStatus, _ as JobStatus, _ as String)
1 * this.agentRoutingService.isAgentConnectionLocal(this.jobId) >> true
0 * this.responseObserver.onNext(_ as JobKillRegistrationResponse)
0 * this.responseObserver.onCompleted()
noExceptionThrown()

when: "The job is active, the agent is connected, the job is local but no observer, and current job status is invalid for updating"
this.serviceSpy.killJob(this.jobId, this.reason, this.servletRequest)

then: "The database is not updated and GenieInvalidStatusException is thrown"
1 * this.persistenceService.getJobStatus(this.jobId) >> JobStatus.CLAIMED
1 * this.persistenceService.updateJobStatus(this.jobId, JobStatus.CLAIMED, JobStatus.KILLED, this.reason) >> {
throw new GenieInvalidStatusException()
}
1 * this.agentRoutingService.isAgentConnectionLocal(this.jobId) >> true
0 * this.responseObserver.onNext(_ as JobKillRegistrationResponse)
0 * this.responseObserver.onCompleted()
thrown(GenieInvalidStatusException)

when: "The job is active, the agent is connected, the job is local but no observer, and the job is not found"
this.serviceSpy.killJob(this.jobId, this.reason, this.servletRequest)

then: "The database is not updated and GenieJobNotFoundException is thrown"
1 * this.persistenceService.getJobStatus(this.jobId) >> JobStatus.CLAIMED
1 * this.persistenceService.updateJobStatus(this.jobId, JobStatus.CLAIMED, JobStatus.KILLED, this.reason) >> {
throw new NotFoundException()
}
1 * this.agentRoutingService.isAgentConnectionLocal(this.jobId) >> true
0 * this.responseObserver.onNext(_ as JobKillRegistrationResponse)
0 * this.responseObserver.onCompleted()
thrown(GenieJobNotFoundException)

when: "The job is active, the agent is connected, and there is an observer"
this.serviceSpy.registerForKillNotification(this.request, this.responseObserver)
Expand Down

0 comments on commit 77cf7e6

Please sign in to comment.