From 27e40c35d895aa858e407f7b7f8f0b1706cfd2aa Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 2 Jan 2025 13:03:16 +0700 Subject: [PATCH] chore: add cpu_threads settings in cortex extension --- extensions/inference-cortex-extension/bin/version.txt | 2 +- .../resources/default_settings.json | 10 ++++++++++ extensions/inference-cortex-extension/src/index.ts | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index 238d6e882a..3c25d39659 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.7 +1.0.8-rc1 diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index 1e5ec8db68..31586fbe64 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -18,6 +18,16 @@ "placeholder": "4" } }, + { + "key": "cpu_threads", + "title": "CPU Threads", + "description": "The number of threads to use for inferencing (CPU MODE ONLY)", + "controllerType": "input", + "controllerProps": { + "value": "", + "placeholder": "4" + } + }, { "key": "flash_attn", "title": "Flash Attention enabled", diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 4cc322436d..5ba28ac57e 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -43,6 +43,7 @@ export enum Settings { flash_attn = 'flash_attn', cache_type = 'cache_type', use_mmap = 'use_mmap', + cpu_threads = 'cpu_threads', } /** @@ -66,6 +67,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { flash_attn: boolean = true use_mmap: boolean = true cache_type: string = 'f16' + cpu_threads?: number /** * The URL for making inference requests. @@ -105,6 +107,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.flash_attn = await this.getSetting(Settings.flash_attn, true) this.use_mmap = await this.getSetting(Settings.use_mmap, true) this.cache_type = await this.getSetting(Settings.cache_type, 'f16') + const threads_number = Number( + await this.getSetting(Settings.cpu_threads, '') + ) + if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number this.queue.add(() => this.clean()) @@ -150,6 +156,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.cache_type = value as string } else if (key === Settings.use_mmap && typeof value === 'boolean') { this.use_mmap = value as boolean + } else if (key === Settings.cpu_threads && typeof value === 'string') { + const threads_number = Number(value) + if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number } } @@ -207,6 +216,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { flash_attn: this.flash_attn, cache_type: this.cache_type, use_mmap: this.use_mmap, + ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}), }, timeout: false, signal,