Skip to content

Commit

Permalink
Force updating the job status to KILLED when killing a job that has a…
Browse files Browse the repository at this point in the history
… connected agent but no response observer
  • Loading branch information
bhou committed Oct 21, 2023
1 parent c6c81dc commit 7606466
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,35 @@ public void killJob(
= this.parkedJobKillResponseObservers.remove(jobId);

if (responseObserver == null) {
log.error("Job {} not killed. Expected local agent connection not found", jobId);
throw new GenieServerException(
"Job " + jobId + " not killed. Expected local agent connection not found."
// This might happen when the agent has gone but its status is not updated
// In this case, we force updating the job status to KILLED.
log.warn("Tried to kill Job {}, but expected local agent connection not found. "
+ "Trying to force updating the job status to {}",
jobId,
JobStatus.KILLED
);
}
responseObserver.onNext(JobKillRegistrationResponse.newBuilder().build());
responseObserver.onCompleted();
try {
this.persistenceService.updateJobStatus(jobId, currentJobStatus, JobStatus.KILLED, reason);
log.info("Succeeded to force updating the status of Job {} to {}",
jobId,
JobStatus.KILLED
);
} catch (final Exception e) {
log.error("Failed to force updating the status of Job {} to {}",
jobId,
JobStatus.KILLED
);
throw new GenieServerException("Failed to force updating the status of Job "
+ jobId + " to " + JobStatus.KILLED,
e
);
}
} else {
responseObserver.onNext(JobKillRegistrationResponse.newBuilder().build());
responseObserver.onCompleted();

log.info("Agent notified for killing job {}", jobId);
log.info("Agent notified for killing job {}", jobId);
}
} else {
// Agent is running somewhere else try to forward the request
final String hostname = this.agentRoutingService
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,25 @@ class GRpcJobKillServiceImplSpec extends Specification {
0 * this.agentRoutingService.isAgentConnectionLocal(this.jobId)
noExceptionThrown()

when: "The job is active, the agent is connected, the job is local but no observer"
when: "The job is active, the agent is connected, the job is local but no observer, and Force updating job status succeeded"
this.serviceSpy.killJob(this.jobId, this.reason, this.servletRequest)

then: "Correct exception is thrown"
then: "The database is updated and no exception is thrown"
1 * this.persistenceService.getJobStatus(this.jobId) >> JobStatus.CLAIMED
0 * this.persistenceService.updateJobStatus(_ as String, _ as JobStatus, _ as JobStatus, _ as String)
1 * this.persistenceService.updateJobStatus(_ as String, _ as JobStatus, _ as JobStatus, _ as String)
1 * this.agentRoutingService.isAgentConnectionLocal(this.jobId) >> true
0 * this.responseObserver.onNext(_ as JobKillRegistrationResponse)
0 * this.responseObserver.onCompleted()
noExceptionThrown()

when: "The job is active, the agent is connected, the job is local but no observer, and Force updating job status failed"
this.serviceSpy.killJob(this.jobId, this.reason, this.servletRequest)

then: "The database is not updated and exception is thrown"
1 * this.persistenceService.getJobStatus(this.jobId) >> JobStatus.CLAIMED
1 * this.persistenceService.updateJobStatus(this.jobId, JobStatus.CLAIMED, JobStatus.KILLED, this.reason) >> {
throw new Exception("whoops")
}
1 * this.agentRoutingService.isAgentConnectionLocal(this.jobId) >> true
0 * this.responseObserver.onNext(_ as JobKillRegistrationResponse)
0 * this.responseObserver.onCompleted()
Expand Down

0 comments on commit 7606466

Please sign in to comment.