Skip to content

Commit

Permalink
Merge pull request #4386 from janhq/chore/add-cpu-threads-cortex-exte…
Browse files Browse the repository at this point in the history
…nsion-settings

chore: add cpu_threads settings in cortex extension
  • Loading branch information
louis-jan authored Jan 2, 2025
2 parents 502bd92 + 27e40c3 commit 5127df4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
2 changes: 1 addition & 1 deletion extensions/inference-cortex-extension/bin/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.7
1.0.8-rc1
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@
"placeholder": "4"
}
},
{
"key": "cpu_threads",
"title": "CPU Threads",
"description": "The number of threads to use for inferencing (CPU MODE ONLY)",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "4"
}
},
{
"key": "flash_attn",
"title": "Flash Attention enabled",
Expand Down
10 changes: 10 additions & 0 deletions extensions/inference-cortex-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export enum Settings {
flash_attn = 'flash_attn',
cache_type = 'cache_type',
use_mmap = 'use_mmap',
cpu_threads = 'cpu_threads',
}

/**
Expand All @@ -66,6 +67,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
flash_attn: boolean = true
use_mmap: boolean = true
cache_type: string = 'f16'
cpu_threads?: number

/**
* The URL for making inference requests.
Expand Down Expand Up @@ -105,6 +107,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
const threads_number = Number(
await this.getSetting<string>(Settings.cpu_threads, '')
)
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number

this.queue.add(() => this.clean())

Expand Down Expand Up @@ -150,6 +156,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
this.cache_type = value as string
} else if (key === Settings.use_mmap && typeof value === 'boolean') {
this.use_mmap = value as boolean
} else if (key === Settings.cpu_threads && typeof value === 'string') {
const threads_number = Number(value)
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
}
}

Expand Down Expand Up @@ -207,6 +216,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
flash_attn: this.flash_attn,
cache_type: this.cache_type,
use_mmap: this.use_mmap,
...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
},
timeout: false,
signal,
Expand Down

0 comments on commit 5127df4

Please sign in to comment.