Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt to re-establish session connection after unexpected disconnect #5832

Merged
merged 4 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions extensions/positron-supervisor/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@
"category": "%command.positron.supervisor.category%",
"title": "%command.showKernelSupervisorLog.title%",
"shortTitle": "%command.showKernelSupervisorLog.title%"
},
{
"command": "positron.supervisor.reconnectSession",
"category": "%command.positron.supervisor.category%",
"title": "%command.reconnectSession.title%",
"shortTitle": "%command.reconnectSession.title%",
"enablement": "isDevelopment"
}
]
},
Expand Down
3 changes: 2 additions & 1 deletion extensions/positron-supervisor/package.nls.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
"configuration.attachOnStartup.description": "Run <f5> before starting up Jupyter kernel (when supported)",
"configuration.sleepOnStartup.description": "Sleep for n seconds before starting up Jupyter kernel (when supported)",
"command.positron.supervisor.category": "Kernel Supervisor",
"command.showKernelSupervisorLog.title": "Show the Kernel Supervisor Log"
"command.showKernelSupervisorLog.title": "Show the Kernel Supervisor Log",
"command.reconnectSession.title": "Reconnect the current session"
}
80 changes: 68 additions & 12 deletions extensions/positron-supervisor/src/KallichoreAdapterApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { findAvailablePort } from './PortFinder';
import { KallichoreAdapterApi } from './positron-supervisor';
import { JupyterKernelExtra, JupyterKernelSpec, JupyterLanguageRuntimeSession } from './jupyter-adapter';
import { KallichoreSession } from './KallichoreSession';
import { Barrier, PromiseHandles } from './async';
import { Barrier, PromiseHandles, withTimeout } from './async';
import { LogStreamer } from './LogStreamer';
import { createUniqueId, summarizeHttpError } from './util';

Expand Down Expand Up @@ -93,6 +93,10 @@ export class KCApi implements KallichoreAdapterApi {
this._log.appendLine(`Failed to start Kallichore server: ${err}`);
});
}

_context.subscriptions.push(vscode.commands.registerCommand('positron.supervisor.reconnectSession', () => {
this.reconnectActiveSession();
}));
}

/**
Expand Down Expand Up @@ -216,7 +220,7 @@ export class KCApi implements KallichoreAdapterApi {

// Start the server in a new terminal
this._log.appendLine(`Starting Kallichore server ${shellPath} on port ${port}`);
const terminal = vscode.window.createTerminal(<vscode.TerminalOptions>{
const terminal = vscode.window.createTerminal({
name: 'Kallichore',
shellPath: wrapperPath,
shellArgs: [
Expand All @@ -231,7 +235,7 @@ export class KCApi implements KallichoreAdapterApi {
message: `*** Kallichore Server (${shellPath}) ***`,
hideFromUser: !showTerminal,
isTransient: false
});
} satisfies vscode.TerminalOptions);

// Flag to track if the terminal exited before the start barrier opened
let exited = false;
Expand Down Expand Up @@ -479,15 +483,31 @@ export class KCApi implements KallichoreAdapterApi {
* @param session The session to add the disconnect handler to
*/
private addDisconnectHandler(session: KallichoreSession) {
session.disconnected.event(async (state: positron.RuntimeState) => {
this._disposables.push(session.disconnected.event(async (state: positron.RuntimeState) => {
if (state !== positron.RuntimeState.Exited) {
// The websocket disconnected while the session was still
// running. This could signal a problem with the supervisor; we
// should see if it's still running.
this._log.appendLine(`Session '${session.metadata.sessionName}' disconnected while in state '${state}'. This is unexpected; checking server status.`);
await this.testServerExited();

// If the server did not exit, and the session also appears to
// still be running, try to reconnect the websocket. It's
// possible the connection just got dropped or interrupted.
const exited = await this.testServerExited();
if (!exited) {
this._log.appendLine(`The server is still running; attempting to reconnect to session ${session.metadata.sessionId}`);
try {
await withTimeout(session.connect(), 2000, `Timed out reconnecting to session ${session.metadata.sessionId}`);
this._log.appendLine(`Successfully restored connection to ${session.metadata.sessionId}`);
} catch (err) {
// The session could not be reconnected; mark it as
// offline and explain to the user what happened.
session.markOffline('Lost connection to the session WebSocket event stream and could not restore it: ' + err);
vscode.window.showErrorMessage(vscode.l10n.t('Unable to re-establish connection to {0}: {1}', session.metadata.sessionName, err));
}
}
}
});
}));
}

/**
Expand All @@ -500,14 +520,16 @@ export class KCApi implements KallichoreAdapterApi {
* handle the case where the server process is running but it's become
* unresponsive.
*
* @returns A promise that resolves when the server has been confirmed to be
* running or has been restarted.
* @returns A promise that resolves when the server has been confirmed to
* be running or has been restarted. Resolves with `true` if the server did
* in fact exit, `false` otherwise.
*/
private async testServerExited() {
private async testServerExited(): Promise<boolean> {
// If we're currently starting, it doesn't make sense to test the
// server status since we're already in the process of starting it.
if (this._starting) {
return this._starting.promise;
await this._starting.promise;
return false;
}

// Load the server state so we can check the process ID
Expand All @@ -517,7 +539,7 @@ export class KCApi implements KallichoreAdapterApi {
// If there's no server state, return as we can't check its status
if (!serverState) {
this._log.appendLine(`No Kallichore server state found; cannot test server process`);
return;
return false;
}

// Test the process ID to see if the server is still running.
Expand All @@ -536,7 +558,7 @@ export class KCApi implements KallichoreAdapterApi {

// The server is still running; nothing to do
if (serverRunning) {
return;
return false;
}

// Clean up the state so we don't try to reconnect to a server that
Expand Down Expand Up @@ -568,6 +590,9 @@ export class KCApi implements KallichoreAdapterApi {
vscode.window.showInformationMessage(
vscode.l10n.t('The process supervising the interpreters has exited unexpectedly and could not automatically restarted: ' + err));
}

// The server did exit.
return true;
}

/**
Expand Down Expand Up @@ -705,4 +730,35 @@ export class KCApi implements KallichoreAdapterApi {

throw new Error(`Kallichore server not found (expected at ${embeddedBinary})`);
}

/**
* Reconnects to the active session, if one exists. Primarily useful as a
* troubleshooting tool.
*/
async reconnectActiveSession() {
// Get the foreground session from the Positron API
const session = await positron.runtime.getForegroundSession();
if (!session) {
vscode.window.showInformationMessage(vscode.l10n.t('No active session to reconnect to'));
return;
}

// Find the session in our list
const kallichoreSession = this._sessions.find(s => s.metadata.sessionId === session.metadata.sessionId);
if (!kallichoreSession) {
vscode.window.showInformationMessage(vscode.l10n.t('Active session {0} not managed by the kernel supervisor', session.metadata.sessionName));
return;
}

// Ensure the session is still active
if (kallichoreSession.runtimeState === positron.RuntimeState.Exited) {
vscode.window.showInformationMessage(vscode.l10n.t('Session {0} is not running', session.metadata.sessionName));
return;
}

// Disconnect the session; since the session is active, this should
// trigger a reconnect.
kallichoreSession.log('Disconnecting by user request', vscode.LogLevel.Info);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding, where does this log out to? If a user wanted to find this log output, where would they go? I don't think it goes to the Kernel Supervisor Output

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are correct! It logs to the Console output logs for the session (e.g. Python 3.12.4 Console).

kallichoreSession.disconnect();
}
}
20 changes: 18 additions & 2 deletions extensions/positron-supervisor/src/KallichoreSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,8 @@ export class KallichoreSession implements JupyterLanguageRuntimeSession {
type === positron.RuntimeClientType.IPyWidgetControl) {

const msg: JupyterCommOpen = {
target_name: type, // eslint-disable-line
comm_id: id, // eslint-disable-line
target_name: type,
comm_id: id,
data: params
};
const commOpen = new CommOpenCommand(msg, metadata);
Expand Down Expand Up @@ -1044,6 +1044,13 @@ export class KallichoreSession implements JupyterLanguageRuntimeSession {
this._disposables = [];
}

/**
* Disconnect the session
*/
public disconnect() {
this._socket?.ws.close();
}

/**
* Main entry point for handling messages delivered over the websocket from
* the Kallichore server.
Expand Down Expand Up @@ -1165,6 +1172,15 @@ export class KallichoreSession implements JupyterLanguageRuntimeSession {
this.onExited(exitCode);
}

/**
* Marks the kernel as offline.
*
* @param reason The reason for the kernel going offline
*/
markOffline(reason: string) {
this.onStateChange(positron.RuntimeState.Offline, reason);
}

private onExited(exitCode: number) {
if (this._restarting) {
// If we're restarting, wait for the kernel to start up again
Expand Down
Loading