->[!Warning]
->**Jan is currently in Development**: Expect breaking changes and bugs!
+> [!Warning] >**Jan is currently in Development**: Expect breaking changes and bugs!
Jan is an open-source ChatGPT alternative that runs 100% offline on your computer.
@@ -45,31 +44,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
+ Customize Jan to match your needs and preferences.
+
+
+
+
+
+
+
+
+
Assistants & Memory
+
+
+ Coming Soon
+
+
+
+
+ Create personalized AI assistants that remember your conversations
+ and execute specific tasks across your systems.
+
+
+
+
+
+
+
+
+
+
Extensions
+
+ Customize Jan with Extensions, that range from Cloud AI
+ connectors, tools, data connectors.
+
+
+
+
+
+
+
+
+
+ )
+}
+
+export default Customizable
diff --git a/docs/src/components/Home/Feature/index.tsx b/docs/src/components/Home/Feature/index.tsx
new file mode 100644
index 0000000000..38c7c8fadf
--- /dev/null
+++ b/docs/src/components/Home/Feature/index.tsx
@@ -0,0 +1,205 @@
+import ThemeImage from '@/components/ThemeImage'
+import { useState } from 'react'
+// import { BsArrowRight } from 'react-icons/bs'
+import { twMerge } from 'tailwind-merge'
+
+const features = [
+ {
+ title: 'Chat with AI',
+ experimantal: false,
+ description:
+ 'Ask your questions, brainstorm, and learn from the AI running on your device to be more productive.',
+ image: {
+ light: '/assets/images/homepage/features01.png',
+ dark: '/assets/images/homepage/features01dark.png',
+ },
+ },
+ {
+ title: 'Model Hub',
+ experimantal: false,
+ description: `Download and Run powerful models like Llama3, Gemma or Mistral on your computer.`,
+ image: {
+ light: '/assets/images/homepage/features02.png',
+ dark: '/assets/images/homepage/features02dark.png',
+ },
+ },
+ {
+ title: 'Connect to Cloud AIs',
+ experimantal: false,
+ description: `You can also route to more powerful cloud models, like OpenAI, Groq, Cohere etc., when needed.`,
+ image: {
+ light: '/assets/images/homepage/features03.png',
+ dark: '/assets/images/homepage/features03dark.png',
+ },
+ },
+ {
+ title: 'Local API Server',
+ experimantal: false,
+ description: `Set up and run your own OpenAI-compatible API server using local models with just one click.`,
+ image: {
+ light: '/assets/images/homepage/features04.png',
+ dark: '/assets/images/homepage/features04dark.png',
+ },
+ },
+ {
+ title: 'Chat with your files',
+ experimantal: true,
+ description: `Set up and run your own OpenAI-compatible API server using local models with just one click.`,
+ image: {
+ light: '/assets/images/homepage/features05.png',
+ dark: '/assets/images/homepage/features05dark.png',
+ },
+ },
+]
+
+const Feature = () => {
+ const [activeFeature, setActiveFeature] = useState(0)
+
+ return (
+ <>
+
+ Jan is opinionated software on what AI should be
+
+
+
+
+
+
Local-first
+
+ {`We believe your conversations and files should remain yours
+ alone. That's why we prioritize local-first AI, running
+ open-source models directly on your computer.`}
+
+
+
+
+
User-owned
+
+ Your data, your rules. Jan stores everything on your device in
+ universal formats, giving you total freedom to move your data
+ without tricks or traps.
+
+
+
+
+
+ Fully Customizable
+
+
+ You can endlessly customize the experience with 3rd party
+ extensions. You can adjust alignment, moderation, and censorship
+ levels to your needs.
+
+ )
+}
+
+export default SocialShareButton
diff --git a/docs/src/components/ThemeImage/index.tsx b/docs/src/components/ThemeImage/index.tsx
new file mode 100644
index 0000000000..eade6fbbcf
--- /dev/null
+++ b/docs/src/components/ThemeImage/index.tsx
@@ -0,0 +1,45 @@
+import Image, { ImageProps } from 'next/image'
+import { twMerge } from 'tailwind-merge'
+
+type Props = Omit & {
+ source: {
+ light: string
+ dark: string
+ }
+ className?: string
+ alt: string
+ width: number
+ height: number
+ priority?: boolean
+}
+
+const ThemeImage = (props: Props) => {
+ const { source, className, alt, width, height, priority } = props
+
+ return (
+ <>
+
+
+ >
+ )
+}
+
+export default ThemeImage
diff --git a/docs/src/components/ThemeVideo/index.tsx b/docs/src/components/ThemeVideo/index.tsx
new file mode 100644
index 0000000000..f136ef3cd6
--- /dev/null
+++ b/docs/src/components/ThemeVideo/index.tsx
@@ -0,0 +1,41 @@
+import { twMerge } from 'tailwind-merge'
+
+type Props = {
+ source: {
+ light: string
+ dark: string
+ }
+ className?: string
+ width: number
+ height: number
+}
+
+const ThemeVideo = (props: Props) => {
+ const { source, className, width, height } = props
+
+ return (
+ <>
+
+
+
+ >
+ )
+}
+
+export default ThemeVideo
diff --git a/docs/src/helpers/authors.yml b/docs/src/helpers/authors.yml
new file mode 100644
index 0000000000..ec58002e48
--- /dev/null
+++ b/docs/src/helpers/authors.yml
@@ -0,0 +1,76 @@
+dan-jan:
+ name: Daniel Onggunhao
+ title: Co-Founder
+ url: https://github.com/dan-jan
+ image_url: https://avatars.githubusercontent.com/u/101145494?v=4
+ email: daniel@jan.ai
+
+namchuai:
+ name: Nam Nguyen
+ title: Developer
+ url: https://github.com/namchuai
+ image_url: https://avatars.githubusercontent.com/u/10397206?v=4
+ email: james@jan.ai
+
+hiro-v:
+ name: Hiro Vuong
+ title: MLE
+ url: https://github.com/hiro-v
+ image_url: https://avatars.githubusercontent.com/u/22463238?v=4
+ email: hiro@jan.ai
+
+ashley-jan:
+ name: Ashley Tran
+ title: Product Designer
+ url: https://github.com/imtuyethan
+ image_url: https://avatars.githubusercontent.com/u/89722390?v=4
+ email: ashley@jan.ai
+
+hientominh:
+ name: Hien To
+ title: DevOps Engineer
+ url: https://github.com/hientominh
+ image_url: https://avatars.githubusercontent.com/u/37921427?v=4
+ email: hien@jan.ai
+
+Van-QA:
+ name: Van Pham
+ title: QA & Release Manager
+ url: https://github.com/Van-QA
+ image_url: https://avatars.githubusercontent.com/u/64197333?v=4
+ email: van@jan.ai
+
+louis-jan:
+ name: Louis Le
+ title: Software Engineer
+ url: https://github.com/louis-jan
+ image_url: https://avatars.githubusercontent.com/u/133622055?v=4
+ email: louis@jan.ai
+
+hahuyhoang411:
+ name: Rex Ha
+ title: LLM Researcher & Content Writer
+ url: https://github.com/hahuyhoang411
+ image_url: https://avatars.githubusercontent.com/u/64120343?v=4
+ email: rex@jan.ai
+
+automaticcat:
+ name: Alan Dao
+ title: AI Engineer
+ url: https://github.com/tikikun
+ image_url: https://avatars.githubusercontent.com/u/22268502?v=4
+ email: alan@jan.ai
+
+hieu-jan:
+ name: Henry Ho
+ title: Software Engineer
+ url: https://github.com/hieu-jan
+ image_url: https://avatars.githubusercontent.com/u/150573299?v=4
+ email: hieu@jan.ai
+
+0xsage:
+ name: Nicole Zhu
+ title: Co-Founder
+ url: https://github.com/0xsage
+ image_url: https://avatars.githubusercontent.com/u/69952136?v=4
+ email: nicole@jan.ai
diff --git a/docs/src/hooks/useBodyClass.ts b/docs/src/hooks/useBodyClass.ts
new file mode 100644
index 0000000000..e0a4844694
--- /dev/null
+++ b/docs/src/hooks/useBodyClass.ts
@@ -0,0 +1,22 @@
+import { useEffect } from 'react'
+
+const addBodyClass = (className: string) =>
+ document.body.classList.add(className)
+const removeBodyClass = (className: string) =>
+ document.body.classList.remove(className)
+
+export function useBodyClass(className: unknown) {
+ useEffect(() => {
+ // Set up
+ className instanceof Array
+ ? className.map(addBodyClass)
+ : addBodyClass(className as string)
+
+ // Clean up
+ return () => {
+ className instanceof Array
+ ? className.map(removeBodyClass)
+ : removeBodyClass(className as string)
+ }
+ }, [className])
+}
diff --git a/docs/src/hooks/useClickOutside.ts b/docs/src/hooks/useClickOutside.ts
new file mode 100644
index 0000000000..2d0e1e3323
--- /dev/null
+++ b/docs/src/hooks/useClickOutside.ts
@@ -0,0 +1,41 @@
+import { useEffect, useRef } from 'react'
+
+const DEFAULT_EVENTS = ['mousedown', 'touchstart']
+
+export function useClickOutside(
+ handler: () => void,
+ events?: string[] | null,
+ nodes?: (HTMLElement | null)[]
+) {
+ const ref = useRef()
+
+ useEffect(() => {
+ const listener = (event: any) => {
+ const { target } = event ?? {}
+ if (Array.isArray(nodes)) {
+ const shouldIgnore =
+ target?.hasAttribute('data-ignore-outside-clicks') ||
+ (!document.body.contains(target) && target.tagName !== 'HTML')
+ const shouldTrigger = nodes.every(
+ (node) => !!node && !event.composedPath().includes(node)
+ )
+ shouldTrigger && !shouldIgnore && handler()
+ } else if (ref.current && !ref.current.contains(target)) {
+ handler()
+ }
+ }
+
+ ;(events || DEFAULT_EVENTS).forEach((fn) =>
+ document.addEventListener(fn, listener)
+ )
+
+ return () => {
+ ;(events || DEFAULT_EVENTS).forEach((fn) =>
+ document.removeEventListener(fn, listener)
+ )
+ }
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [ref, handler, nodes])
+
+ return ref
+}
diff --git a/docs/src/hooks/useDiscordWidget.ts b/docs/src/hooks/useDiscordWidget.ts
new file mode 100644
index 0000000000..1c47d7e8e0
--- /dev/null
+++ b/docs/src/hooks/useDiscordWidget.ts
@@ -0,0 +1,30 @@
+import React, { useEffect, useState } from 'react'
+
+import axios from 'axios'
+import { isAxiosError } from 'axios'
+
+export const useDiscordWidget = () => {
+ const [data, setData] = useState<{ presence_count: number }>({
+ presence_count: 0,
+ })
+
+ useEffect(() => {
+ const updateData = async () => {
+ try {
+ const { data } = await axios.get<{ presence_count: number }>(
+ 'https://discord.com/api/guilds/1107178041848909847/widget.json'
+ )
+ setData({
+ ...data,
+ })
+ } catch (error) {
+ if (isAxiosError(error)) {
+ console.error('Failed to get discord widget:', error)
+ }
+ }
+ }
+ updateData()
+ }, [])
+
+ return { data }
+}
diff --git a/docs/src/hooks/useIsomorphicLayoutEffect.ts b/docs/src/hooks/useIsomorphicLayoutEffect.ts
new file mode 100644
index 0000000000..49ca31f398
--- /dev/null
+++ b/docs/src/hooks/useIsomorphicLayoutEffect.ts
@@ -0,0 +1,4 @@
+import { useEffect, useLayoutEffect } from 'react'
+
+export const useIsomorphicLayoutEffect =
+ typeof window !== 'undefined' ? useLayoutEffect : useEffect
diff --git a/docs/src/pages/_app.mdx b/docs/src/pages/_app.mdx
new file mode 100644
index 0000000000..8fa4b48755
--- /dev/null
+++ b/docs/src/pages/_app.mdx
@@ -0,0 +1,32 @@
+import '@/styles/main.scss'
+import '@code-hike/mdx/styles.css'
+import { Fragment } from "react"
+import Script from "next/script"
+
+
+export default function App({ Component, pageProps }) {
+ return (
+
+
+
+
+
+ )
+}
+
diff --git a/docs/src/pages/_document.tsx b/docs/src/pages/_document.tsx
new file mode 100644
index 0000000000..c939b718c2
--- /dev/null
+++ b/docs/src/pages/_document.tsx
@@ -0,0 +1,13 @@
+import { Html, Head, Main, NextScript } from 'next/document'
+
+export default function Document() {
+ return (
+
+
+
+
+
+
+
+ )
+}
diff --git a/docs/src/pages/_meta.json b/docs/src/pages/_meta.json
new file mode 100644
index 0000000000..c83c40ec62
--- /dev/null
+++ b/docs/src/pages/_meta.json
@@ -0,0 +1,62 @@
+{
+ "index": {
+ "type": "page",
+ "title": "Homepage",
+ "display": "hidden",
+ "theme": {
+ "layout": "raw"
+ }
+ },
+ "docs": {
+ "type": "page",
+ "title": "Documentation"
+ },
+ "cortex": {
+ "type": "page",
+ "title": "Cortex",
+ "display": "hidden"
+ },
+ "integrations": {
+ "type": "page",
+ "title": "Integrations",
+ "display": "hidden"
+ },
+ "changelog": {
+ "type": "page",
+ "title": "Changelog",
+ "theme": {
+ "layout": "raw"
+ }
+ },
+ "about": {
+ "type": "page",
+ "title": "About"
+ },
+ "blog": {
+ "type": "page",
+ "title": "Blog",
+ "theme": {
+ "layout": "raw"
+ }
+ },
+ "post": {
+ "type": "page",
+ "title": "Post Categories",
+ "display": "hidden"
+ },
+
+ "download": {
+ "type": "page",
+ "theme": {
+ "layout": "raw"
+ }
+ },
+ "privacy": {
+ "title": "Privacy",
+ "display": "hidden"
+ },
+ "support": {
+ "title": "Support",
+ "display": "hidden"
+ }
+}
diff --git a/docs/src/pages/about/_assets/eniac.jpeg b/docs/src/pages/about/_assets/eniac.jpeg
new file mode 100644
index 0000000000..6facc4d04f
Binary files /dev/null and b/docs/src/pages/about/_assets/eniac.jpeg differ
diff --git a/docs/src/pages/about/_assets/solar-punk.webp b/docs/src/pages/about/_assets/solar-punk.webp
new file mode 100644
index 0000000000..20829fea49
Binary files /dev/null and b/docs/src/pages/about/_assets/solar-punk.webp differ
diff --git a/docs/src/pages/about/_assets/solarpunk.jpeg b/docs/src/pages/about/_assets/solarpunk.jpeg
new file mode 100644
index 0000000000..f00d7d43da
Binary files /dev/null and b/docs/src/pages/about/_assets/solarpunk.jpeg differ
diff --git a/docs/src/pages/about/_assets/star-wars-droids.png b/docs/src/pages/about/_assets/star-wars-droids.png
new file mode 100644
index 0000000000..a8dffa4c75
Binary files /dev/null and b/docs/src/pages/about/_assets/star-wars-droids.png differ
diff --git a/docs/src/pages/about/_assets/vision-1.webp b/docs/src/pages/about/_assets/vision-1.webp
new file mode 100644
index 0000000000..66e41b5433
Binary files /dev/null and b/docs/src/pages/about/_assets/vision-1.webp differ
diff --git a/docs/src/pages/about/_meta.json b/docs/src/pages/about/_meta.json
new file mode 100644
index 0000000000..5acc0955aa
--- /dev/null
+++ b/docs/src/pages/about/_meta.json
@@ -0,0 +1,29 @@
+{
+ "about-separator": {
+ "title": "About Us",
+ "type": "separator"
+ },
+ "index": "About",
+ "vision": {
+ "title": "Vision",
+ "display": "hidden"
+ },
+ "team": "Team",
+ "investors": "Investors",
+ "wall-of-love": {
+ "theme": {
+ "toc": false,
+ "layout": "full"
+ }
+ },
+ "acknowledgements": {
+ "display": "hidden"
+ },
+ "handbook-separator": {
+ "title": "Handbook",
+ "display": "hidden"
+ },
+ "handbook": {
+ "display": "hidden"
+ }
+}
diff --git a/docs/src/pages/about/handbook.mdx b/docs/src/pages/about/handbook.mdx
new file mode 100644
index 0000000000..fc2775364d
--- /dev/null
+++ b/docs/src/pages/about/handbook.mdx
@@ -0,0 +1,44 @@
+---
+title: Handbook
+description: How we work at Jan
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ build in public,
+ remote team,
+ how we work,
+ ]
+---
+
+# How We Work
+
+Jan operates on open-source principles, giving everyone the freedom to adjust, personalize, and contribute to its development. Our focus is on creating a community-powered ecosystem that prioritizes transparency, customization, and user privacy. For more on our principles, visit our [About page](https://jan.ai/about).
+
+## Open-Source
+
+We embrace open development, showcasing our progress and upcoming features on GitHub, and we encourage your input and contributions:
+
+- [Jan Framework](https://github.com/janhq/jan) (AGPLv3)
+- [Jan Desktop Client & Local server](https://jan.ai) (AGPLv3, built on Jan Framework)
+- [Nitro: run Local AI](https://github.com/janhq/nitro) (AGPLv3)
+
+## Build in Public
+
+We use GitHub to build in public and welcome anyone to join in.
+
+- [Jan's Kanban](https://github.com/orgs/janhq/projects/5)
+- [Jan's Roadmap](https://github.com/orgs/janhq/projects/5/views/29)
+
+## Collaboration
+
+Our team spans the globe, working remotely to bring Jan to life. We coordinate through Discord and GitHub, valuing asynchronous communication and minimal, purposeful meetings. For collaboration and brainstorming, we utilize tools like [Excalidraw](https://excalidraw.com/) and [Miro](https://miro.com/), ensuring alignment and shared vision through visual storytelling and detailed documentation on [HackMD](https://hackmd.io/).
+
+Check out the [Jan Framework](https://github.com/janhq/jan) and our desktop client & local server at [jan.ai](https://jan.ai), both licensed under AGPLv3 for maximum openness and user freedom.
diff --git a/docs/src/pages/about/handbook/_meta.json b/docs/src/pages/about/handbook/_meta.json
new file mode 100644
index 0000000000..8b72b18921
--- /dev/null
+++ b/docs/src/pages/about/handbook/_meta.json
@@ -0,0 +1,21 @@
+{
+ "strategy": {
+ "display": "hidden"
+ },
+ "project-management": {
+ "display": "hidden"
+ },
+ "engineering": {
+ "display": "hidden"
+ },
+ "product-design": {
+ "display": "hidden"
+ },
+ "analytics": {
+ "display": "hidden"
+ },
+ "website-docs": {
+ "title": "Website & Docs",
+ "display": "hidden"
+ }
+}
diff --git a/docs/src/pages/about/handbook/analytics.mdx b/docs/src/pages/about/handbook/analytics.mdx
new file mode 100644
index 0000000000..9e7833e328
--- /dev/null
+++ b/docs/src/pages/about/handbook/analytics.mdx
@@ -0,0 +1,28 @@
+---
+title: Analytics
+description: Jan's Analytics philosophy and implementation
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ analytics,
+ ]
+---
+
+# Analytics
+
+Adhering to Jan's privacy preserving philosophy, our analytics philosophy is to get "barely-enough-to-function'.
+
+## What is tracked
+
+1. By default, Github tracks downloads and device metadata for all public GitHub repositories. This helps us troubleshoot & ensure cross-platform support.
+2. We use [Umami](https://umami.is/) to collect, analyze, and understand application data while maintaining visitor privacy and data ownership. We are using the Umami Cloud in Europe to ensure GDPR compliance. Please see [Umami Privacy Policy](https://umami.is/privacy) for more details.
+3. We use Umami to track a single `app.opened` event without additional user metadata, in order to understand retention. In addition, we track `app.version` to understand app version usage.
+4. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.
diff --git a/docs/src/pages/about/handbook/engineering.mdx b/docs/src/pages/about/handbook/engineering.mdx
new file mode 100644
index 0000000000..dcfa2a32e6
--- /dev/null
+++ b/docs/src/pages/about/handbook/engineering.mdx
@@ -0,0 +1,23 @@
+---
+title: Engineering
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ ]
+---
+
+# Engineering
+
+## Prerequisites
+
+- [Requirements](https://github.com/janhq/jan?tab=readme-ov-file#requirements-for-running-jan)
+- [Setting up local env](https://github.com/janhq/jan?tab=readme-ov-file#contributing)
diff --git a/docs/src/pages/about/handbook/engineering/_meta.json b/docs/src/pages/about/handbook/engineering/_meta.json
new file mode 100644
index 0000000000..06699fe566
--- /dev/null
+++ b/docs/src/pages/about/handbook/engineering/_meta.json
@@ -0,0 +1,4 @@
+{
+ "ci-cd": "CI & CD",
+ "qa": "QA"
+}
diff --git a/docs/src/pages/about/handbook/engineering/ci-cd.mdx b/docs/src/pages/about/handbook/engineering/ci-cd.mdx
new file mode 100644
index 0000000000..44d389b85f
--- /dev/null
+++ b/docs/src/pages/about/handbook/engineering/ci-cd.mdx
@@ -0,0 +1,11 @@
+---
+title: CI & CD
+---
+
+import { Callout } from 'nextra/components'
+
+# CI & CD
+
+Previously we were trunk based. Now we use the following Gitflow:
+
+TODO: @van to include her Mermaid diagram
diff --git a/docs/src/pages/about/handbook/engineering/qa.mdx b/docs/src/pages/about/handbook/engineering/qa.mdx
new file mode 100644
index 0000000000..2def2a4f5c
--- /dev/null
+++ b/docs/src/pages/about/handbook/engineering/qa.mdx
@@ -0,0 +1,82 @@
+---
+title: QA
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ ]
+---
+
+# QA
+
+## Phase 1: Planning
+
+### Definition of Ready (DoR):
+
+- **Scope Defined:** The features to be implemented are clearly defined and scoped out.
+- **Requirements Gathered:** Gather and document all the necessary requirements for the feature.
+- **Stakeholder Input:** Ensure relevant stakeholders have provided input on the document scope and content.
+
+### Definition of Done (DoD):
+
+- **Document Complete:** All sections of the document are filled out with relevant information.
+- **Reviewed by Stakeholders:** The document has been reviewed and approved by stakeholders.
+- **Ready for Development:** The document is in a state where developers can use it to begin implementation.
+
+## Phase 2: Development
+
+### Definition of Ready (DoR):
+
+- **Task Breakdown:** The development team has broken down tasks based on the document.
+- **Communication Plan:** A plan is in place for communication between developers and writers if clarification is needed during implementation.
+- **Developer Understanding:** Developers have a clear understanding of the document content.
+
+### Definition of Done (DoD):
+
+- **Code Implementation:** The feature is implemented according to the document specifications.
+- **Developer Testing:**
+ - Unit tests and basic integration tests are completed
+ - Developer also completed self-testing for the feature (please add this as a comment in the ticket, with the tested OS and as much info as possible to reduce overlaping effort).
+ - (AC -> Code Changes -> Impacted scenarios)
+- **Communication with Writers:** Developers have communicated any changes or challenges to the writers, and necessary adjustments are made in the document. (Can be through a note in the PR of the feature for writers to take care, or create a separate PR with the change you made for the docs, for writers to review)
+
+## Phase 3: QA for feature
+
+### Definition of Ready (DoR):
+
+- **Test Note Defined:** The test note is prepared outlining the testing items.
+- **Environment Ready:** PR merged to nightly build, Nightly build notes updated (automatically from pipeline after merged).
+- **Status:** Ticket moved to the column Testing and assigning to QA/writers to review.
+- **Test Data Prepared:** Relevant test data is prepared for testing the scenarios.
+
+### Definition of Done (DoD):
+
+- **Test Executed:** All identified test items are executed on different OS, along with exploratory testing.
+- **Defects Logged:** Any defects found during testing are resolved / appropriately logged (and approved for future fix).
+- **Test Sign-Off:** QA team provides sign-off indicating the completion of testing.
+
+## Phase 4: Release (DoR)
+
+- **Pre-release wait time:** Code change to pre-release version should be frozen for at least X (hrs/days) for Regression testing purpose.
+ - Pre-release cut off on Thu morning for the team to regression test.
+ - Release to production (Stable) during working hour on Mon morning (if no blocker) or Tue morning.
+ - During the release cut off, the nightly build will be paused, to leave room for pre-release build. The build version used for regression test will be notified.
+- **Pre-release testing:** A review of the implemented feature has been conducted, a long with regression test (check-list) by the team.
+ - Release checklist cloned from the templat for different OS (with hackMD link)
+ - New key test items from new feature added to the checklist.
+ - Split 3 OS to different team members for testing.
+- **Document Updated:** The document is updated based on the review and feedback on any discrepancies or modification needed for this release.
+- **Reviewed by Stakeholders:** New feature and the updated document is reviewed and approved by stakeholders. The document is in its final version, reflecting the implemented feature accurately.
+
+## Notes (WIP)
+
+- **API collection run:** to run along with nightly build daily, for critical API validation
+- **Automation run:** for regression testing purpose, to reduce manual testing effort for the same items each release on multiple OS.
diff --git a/docs/src/pages/about/handbook/product-design.mdx b/docs/src/pages/about/handbook/product-design.mdx
new file mode 100644
index 0000000000..c0d0c10fa3
--- /dev/null
+++ b/docs/src/pages/about/handbook/product-design.mdx
@@ -0,0 +1,27 @@
+---
+title: Product & Design
+description: How we work on product design
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ product design,
+ ]
+---
+
+# Product & Design
+
+## Roadmap
+
+- Conversations over Tickets
+ - Discord's #roadmap channel
+ - Work with the community to turn conversations into Product Specs
+- Future System?
+ - Use Canny?
diff --git a/docs/src/pages/about/handbook/project-management.mdx b/docs/src/pages/about/handbook/project-management.mdx
new file mode 100644
index 0000000000..aa43b1ddd9
--- /dev/null
+++ b/docs/src/pages/about/handbook/project-management.mdx
@@ -0,0 +1,83 @@
+---
+title: Project Management
+description: Project management at Jan
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ project management,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+
+# Project Management
+
+We use the [Jan Monorepo Project](https://github.com/orgs/janhq/projects/5) in Github to manage our roadmap and sprint Kanbans.
+
+As much as possible, everyone owns their respective `epics` and `tasks`.
+
+
+ We aim for a `loosely coupled, but tightly aligned` autonomous culture.
+
+
+## Quicklinks
+
+- [High-level roadmap](https://github.com/orgs/janhq/projects/5/views/16): view used at at strategic level, for team wide alignment. Start & end dates reflect engineering implementation cycles. Typically product & design work preceeds these timelines.
+- [Standup Kanban](https://github.com/orgs/janhq/projects/5/views/25): view used during daily standup. Sprints should be up to date.
+
+## Organization
+
+[`Roadmap Labels`](https://github.com/janhq/jan/labels?q=roadmap)
+
+- `Roadmap Labels` tag large, long-term, & strategic projects that can span multiple teams and multiple sprints
+- Example label: `roadmap: Jan has Mobile`
+- `Roadmaps` contain `epics`
+
+[`Epics`](https://github.com/janhq/jan/issues?q=is%3Aissue+is%3Aopen+label%3A%22type%3A+epic%22)
+
+- `Epics` track large stories that span 1-2 weeks, and it outlines specs, architecture decisions, designs
+- `Epics` contain `tasks`
+- `Epics` should always have 1 owner
+
+[`Milestones`](https://github.com/janhq/jan/milestones)
+
+- `Milestones` track release versions. We use [semantic versioning](https://semver.org/)
+- `Milestones` span ~2 weeks and have deadlines
+- `Milestones` usually fit within 2-week sprint cycles
+
+[`Tasks`](https://github.com/janhq/jan/issues)
+
+- Tasks are individual issues (feats, bugs, chores) that can be completed within a few days
+- Tasks, except for critical bugs, should always belong to an `epic` (and thus fit into our roadmap)
+- Tasks are usually named per [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/#summary)
+- Tasks should always have 1 owner
+
+We aim to always sprint on `tasks` that are a part of the [current roadmap](https://github.com/orgs/janhq/projects/5/views/16).
+
+## Kanban
+
+- `no status`: issues that need to be triaged (needs an owner, ETA)
+- `icebox`: issues you don't plan to tackle yet
+- `planned`: issues you plan to tackle this week
+- `in-progress`: in progress
+- `in-review`: pending PR or blocked by something
+- `done`: done
+
+## Triage SOP
+
+- `Urgent bugs`: assign to an owner (or @engineers if you are not sure) && tag the current `sprint` & `milestone`
+- `All else`: assign the correct roadmap `label(s)` and owner (if any)
+
+### Request for help
+
+As a result, our feature prioritization can feel a bit black box at times.
+
+We'd appreciate high quality insights and volunteers for user interviews through [Discord](https://discord.gg/af6SaTdzpx) and [Github](https://github.com/janhq).
diff --git a/docs/src/pages/about/handbook/strategy.mdx b/docs/src/pages/about/handbook/strategy.mdx
new file mode 100644
index 0000000000..f2ce623875
--- /dev/null
+++ b/docs/src/pages/about/handbook/strategy.mdx
@@ -0,0 +1,51 @@
+# Strategy
+
+We only have 2 planning parameters:
+
+- 10 year vision
+- 2 week sprint
+- Quarterly OKRs
+
+## Ideal Customer
+
+Our ideal customer is an AI enthusiast or business who has experienced some limitations with current AI solutions and is keen to find open source alternatives.
+
+## Problems
+
+Our ideal customer would use Jan to solve one of these problems.
+
+_Control_
+
+- Control (e.g. preventing vendor lock-in)
+- Stability (e.g. runs predictably every time)
+- Local-use (e.g. for speed, or for airgapped environments)
+
+_Privacy_
+
+- Data protection (e.g. personal data or company data)
+- Privacy (e.g. nsfw)
+
+_Customisability_
+
+- Tinkerability (e.g. ability to change model, experiment)
+- Niche Models (e.g. fine-tuned, domain-specific models that outperform OpenAI)
+
+Sources: [^1] [^2] [^3] [^4]
+
+[^1]: [What are you guys doing that can't be done with ChatGPT?](https://www.reddit.com/r/LocalLLaMA/comments/17mghqr/comment/k7ksti6/?utm_source=share&utm_medium=web2x&context=3)
+[^2]: [What's your main interest in running a local LLM instead of an existing API?](https://www.reddit.com/r/LocalLLaMA/comments/1718a9o/whats_your_main_interest_in_running_a_local_llm/)
+[^3]: [Ask HN: What's the best self-hosted/local alternative to GPT-4?](https://news.ycombinator.com/item?id=36138224)
+[^4]: [LoRAs](https://www.reddit.com/r/LocalLLaMA/comments/17mghqr/comment/k7mdz1i/?utm_source=share&utm_medium=web2x&context=3)
+
+## Solution
+
+Jan is a seamless user experience that runs on your personal computer, that glues the different pieces of the open source AI ecosystem to provide an alternative to OpenAI's closed platform.
+
+- We build a comprehensive, seamless platform that takes care of the technical chores across the stack required to run open source AI
+- We run on top of a local folder of non-proprietary files, that anyone can tinker with (yes, even other apps!)
+- We provide open formats for packaging and distributing AI to run reproducibly across devices
+
+## Prerequisites
+
+- [Figma](https://figma.com)
+- [ScreenStudio](https://www.screen.studio/)
diff --git a/docs/src/pages/about/handbook/website-docs.mdx b/docs/src/pages/about/handbook/website-docs.mdx
new file mode 100644
index 0000000000..9ab2c2e103
--- /dev/null
+++ b/docs/src/pages/about/handbook/website-docs.mdx
@@ -0,0 +1,89 @@
+---
+title: Website & Docs
+description: Information about the Jan website and documentation.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ website,
+ documentation,
+ ]
+---
+
+# Website & Docs
+
+This website is built using [Docusaurus 3.0](https://docusaurus.io/), a modern static website generator.
+
+## Information Architecture
+
+We try to **keep routes consistent** to maintain SEO.
+
+- **`/guides/`**: Guides on how to use the Jan application. For end users who are directly using Jan.
+
+- **`/developer/`**: Developer docs on how to extend Jan. These pages are about what people can build with our software.
+
+- **`/api-reference/`**: Reference documentation for the Jan API server, written in Swagger/OpenAPI format.
+
+- **`/changelog/`**: A list of changes made to the Jan application with each release.
+
+- **`/blog/`**: A blog for the Jan application.
+
+## How to Contribute
+
+Refer to the [Contributing Guide](https://github.com/janhq/jan/blob/dev/CONTRIBUTING.md) for more comprehensive information on how to contribute to the Jan project.
+
+## Pre-requisites and Installation
+
+- [Node.js](https://nodejs.org/en/) (version 20.0.0 or higher)
+- [yarn](https://yarnpkg.com/) (version 1.22.0 or higher)
+
+### Installation
+
+```bash
+cd jan/docs
+```
+
+```bash
+yarn install && yarn start
+```
+
+This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
+
+### Build
+
+```bash
+yarn build
+```
+
+This command generates static content into the `build` directory and can be served using any static contents hosting service.
+
+### Deployment
+
+Using SSH:
+
+```bash
+USE_SSH=true yarn deploy
+```
+
+Not using SSH:
+
+```bash
+GIT_USER= yarn deploy
+```
+
+If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
+
+### Preview URL, Pre-release and Publishing Documentation
+
+- When a pull request is created, the preview URL will be automatically commented on the pull request.
+
+- The documentation will then be published to [https://dev.jan.ai/](https://dev.jan.ai/) when the pull request is merged to `main`.
+
+- Our open-source maintainers will sync the updated content from `main` to `release` branch, which will then be published to [https://jan.ai/](https://jan.ai/).
diff --git a/docs/src/pages/about/index.mdx b/docs/src/pages/about/index.mdx
new file mode 100644
index 0000000000..c50093e534
--- /dev/null
+++ b/docs/src/pages/about/index.mdx
@@ -0,0 +1,105 @@
+---
+title: Homebrew Computer Company
+description: We are Homebrew Computer Company, the creators and maintainers of Jan, Cortex and other tools.
+keywords:
+ [
+ Homebrew Computer Company,
+ Jan,
+ local AI,
+ open-source alternative to chatgpt,
+ alternative to openai platform,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ about Jan,
+ desktop application,
+ thinking machines,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+
+# Homebrew Computer Company
+
+![Eniac](./_assets/eniac.jpeg)
+_[Eniac](https://www.computerhistory.org/revolution/birth-of-the-computer/4/78), the World's First Computer (Photo courtesy of US Army)_
+
+## About
+
+We're a team of AI researchers and engineers. We are the creators and lead maintainers of a few open-source AI tools:
+
+- 👋 [Jan](https://jan.ai): ChatGPT-alternative that runs 100% offline
+- 🤖 [Cortex](https://cortex.so/docs/): A simple, embeddable library to run LLMs locally
+- More to come!
+
+
+The [Homebrew Computer Club](https://en.wikipedia.org/wiki/Homebrew_Computer_Club) was an early computer hobbyist group from 1975 to 1986 that led to Apple and the personal computer revolution.
+
+
+### Mission
+
+We're a robotics company that focuses on the cognitive framework for future robots. Our long-term mission is to advance human-machine collaboration to enable human civilization to thrive.
+
+### Business Model
+
+We're currently a bootstrapped startup [^2]. We balance technical invention with the search for a sustainable business model (e.g., consulting, paid support, and custom development).
+
+
+We welcome business inquiries: 👋 hello@jan.ai
+
+
+### Community
+
+We have a thriving community built around [Jan](../docs), where we also discuss our other projects.
+
+- [Discord](https://discord.gg/AAGQNpJQtH)
+- [Twitter](https://twitter.com/jandotai)
+- [LinkedIn](https://www.linkedin.com/company/homebrewltd)
+- [HuggingFace](https://huggingface.co/janhq)
+- Email: hello@jan.ai
+
+## Philosophy
+
+Homebrew is an opinionated company with a clear philosophy for the products we build:
+
+### 🔑 User Owned
+
+We build tools that are user-owned. Our products are [open-source](https://en.wikipedia.org/wiki/Open_source), designed to run offline or be [self-hosted](https://www.reddit.com/r/selfhosted/). We make no attempt to lock you in, and our tools are free of [user-hostile dark patterns](https://twitter.com/karpathy/status/1761467904737067456?t=yGoUuKC9LsNGJxSAKv3Ubg) [^1].
+
+We adopt [Local-first](https://www.inkandswitch.com/local-first/) principles and store data locally in [universal file formats](https://stephango.com/file-over-app). We build for privacy by default, and we do not [collect or sell your data](/privacy).
+
+### 🔧 Right to Tinker
+
+We believe in the [Right to Repair](https://en.wikipedia.org/wiki/Right_to_repair). We encourage our users to take it further by [tinkering, extending, and customizing](https://www.popularmechanics.com/technology/gadgets/a4395/pm-remembers-steve-jobs-how-his-philosophy-changed-technology-6507117/) our products to fit their needs.
+
+Our products are designed with [Extension APIs](/docs/extensions), and we do our best to write good [documentation](/docs) so users understand how things work under the hood.
+
+### 👫 Build with the Community
+
+We are part of a larger open-source community and are committed to being a good jigsaw puzzle piece. We credit and actively contribute to upstream projects.
+
+We adopt a public-by-default approach to [Project Management](https://github.com/orgs/janhq/projects/5), [Roadmaps](https://github.com/orgs/janhq/projects/5/views/31), and Helpdesk for our products.
+
+## Inspirations
+
+> Good artists borrow, great artists steal - Picasso
+
+We are inspired by and actively try to emulate the paths of companies we admire ❤️:
+
+- [Posthog](https://posthog.com/handbook)
+- [Obsidian](https://obsidian.md/)
+- [Discourse](https://www.discourse.org/about)
+- [Gitlab](https://handbook.gitlab.com/handbook/company/history/#2017-gitlab-storytime)
+- [Redhat](https://www.redhat.com/en/about/development-model)
+- [Ghost](https://ghost.org/docs/contributing/)
+- [Lago](https://www.getlago.com/blog/open-source-licensing-and-why-lago-chose-agplv3)
+- [Twenty](https://twenty.com/story)
+
+## Footnotes
+
+[^1]: [Kaparthy's Love Letter to Obsidian](https://twitter.com/karpathy/status/1761467904737067456?t=yGoUuKC9LsNGJxSAKv3Ubg)
+
+[^2]: [The Market for AI Companies](https://www.artfintel.com/p/the-market-for-ai-companies) by Finbarr Timbers
diff --git a/docs/src/pages/about/investors.mdx b/docs/src/pages/about/investors.mdx
new file mode 100644
index 0000000000..a240625407
--- /dev/null
+++ b/docs/src/pages/about/investors.mdx
@@ -0,0 +1,18 @@
+---
+title: Investors
+description: Our unique, unconventional approach to distributing ownership
+keywords: [
+ ESOP,
+ Thinking Machines,
+ Jan,
+ Jan.ai,
+ Jan AI,
+ cortex,
+]
+---
+
+# Investors
+
+We are a [bootstrapped company](https://en.wikipedia.org/wiki/Bootstrapping), and don't have any external investors (yet).
+
+We're open to exploring opportunities with strategic partners want to tackle [our mission](/about#mission) together.
\ No newline at end of file
diff --git a/docs/src/pages/about/team.mdx b/docs/src/pages/about/team.mdx
new file mode 100644
index 0000000000..205ac106e9
--- /dev/null
+++ b/docs/src/pages/about/team.mdx
@@ -0,0 +1,56 @@
+---
+title: Team
+description: Meet the Thinking Machines team.
+keywords:
+ [
+ Thinking Machines,
+ Jan,
+ Cortex,
+ jan AI,
+ Jan AI,
+ jan.ai,
+ cortex,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+# Team
+
+We're a small, fully-remote team, mostly based in Southeast Asia.
+
+We are committed to become a global company. You can check our [Careers page](https://homebrew.bamboohr.com/careers) if you'd like to join us on our adventure.
+
+
+Ping us in [Discord](https://discord.gg/AAGQNpJQtH) if you're keen to talk to us!
+
+
+## Core Team
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Board Members
+
+
+
+
+
diff --git a/docs/src/pages/about/vision.mdx b/docs/src/pages/about/vision.mdx
new file mode 100644
index 0000000000..64ba612f85
--- /dev/null
+++ b/docs/src/pages/about/vision.mdx
@@ -0,0 +1,56 @@
+---
+title: Vision - Thinking Machines
+description: We want to continue a legacy of craftsmen making tools that propel humanity forward.
+keywords:
+ [
+ Jan AI,
+ Thinking Machines,
+ Jan,
+ ChatGPT alternative,
+ local AI,
+ private AI,
+ conversational AI,
+ OpenAI platform alternative,
+ no-subscription fee,
+ large language model,
+ about Jan,
+ desktop application,
+ thinking machine,
+ jan vision,
+ ]
+---
+
+# Vision
+
+> "I do not fear computers. I fear the lack of them" - Isaac Asimov
+
+![Solarpunk Civilization](./_assets/solarpunk.jpeg)
+
+- Harmonious symbiosis of humans, nature, and machines
+- Humanity has over millennia adopted tools. Fire, electricity, computers, and AI.
+- AI is no different. It is a tool that can propel humanity forward.
+- We reject the
+- Go beyond the apocalypse narratives of Dune and Terminator, and you will find a kernel of progress
+
+We want to continue a legacy of craftsmen making tools that propel humanity forward.
+
+## Collaborating with Thinking Machines
+
+Our vision is to develop thinking machines that work alongside humans.
+
+We envision a future where AI is safely used as a tool in our daily lives, like fire and electricity. These robots enhance human potential and do not replace our key decision-making. You own your own AI.
+
+![jan ai shapes the future](./_assets/vision-1.webp)
+
+![Solarpunk Civilization](./_assets/solar-punk.webp)
+> We like that Luke can just open up R2-D2 and tinker around. He was not submitting support tickets to a centralized server somewhere in the galaxy.
+
+## Solarpunk, not Dune
+
+Our vision is rooted in an optimistic view of AI's role in humanity's future.
+
+Like the [Solarpunk movement](https://en.wikipedia.org/wiki/Solarpunk), we envision a world where technology and nature coexist harmoniously, supporting a sustainable and flourishing ecosystem.
+
+We focus on AI's positive impacts on our world. From environmental conservation to the democratization of energy, AI has the potential to address some of the most pressing challenges facing our planet.
+
+https://www.yesmagazine.org/environment/2021/01/28/climate-change-sustainable-solarpunk
\ No newline at end of file
diff --git a/docs/src/pages/about/wall-of-love.mdx b/docs/src/pages/about/wall-of-love.mdx
new file mode 100644
index 0000000000..060c2c3b62
--- /dev/null
+++ b/docs/src/pages/about/wall-of-love.mdx
@@ -0,0 +1,23 @@
+---
+title: Wall of Love ❤️
+
+description: Check out what our amazing users are saying about Jan!
+keywords:
+ [
+ Jan,
+ Rethink the Computer,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ wall of love,
+ ]
+---
+
+import WallOfLove from "@/components/Home/WallOfLove"
+
+
+
diff --git a/docs/src/pages/blog.mdx b/docs/src/pages/blog.mdx
new file mode 100644
index 0000000000..b849d32c43
--- /dev/null
+++ b/docs/src/pages/blog.mdx
@@ -0,0 +1,55 @@
+---
+title: Blog
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ architecture,
+ ]
+---
+
+import Blog from "@/components/Blog"
+import fs from 'fs'
+import path from 'path'
+import matter from 'gray-matter'
+import { format } from 'date-fns'
+
+
+
+export const getStaticProps = async() => {
+ const blogPosts = await fs.readdirSync(path.join(process.cwd(), 'src/pages/post')).filter((file) => {
+ return path.extname(file).toLowerCase() === ".mdx" && !file.startsWith('index')
+ })
+ const allBlogPosts = []
+
+ for (const item of blogPosts) {
+ const content = fs.readFileSync(path.join(process.cwd(), `src/pages/post/${item}`), "utf8")
+ const frontmatter = matter(content)
+ if(!frontmatter.data.unlisted) {
+ allBlogPosts.push({
+ title: frontmatter.data?.title || null,
+ url: '/post/' + item?.replace(/\.mdx?/, ''),
+ description: frontmatter.data?.description || null,
+ date: String(frontmatter.data?.date) || null,
+ tags: frontmatter.data.tags?.split(', ') || null,
+ author: frontmatter.data?.author || null,
+ categories: frontmatter.data.categories?.split(', ') || null,
+ })
+ }
+ allBlogPosts.sort((a, b) => new Date(b.date) - new Date(a.date))
+ }
+
+ return {
+ props: {
+ ssg: allBlogPosts
+ },
+ }
+}
\ No newline at end of file
diff --git a/docs/src/pages/changelog.mdx b/docs/src/pages/changelog.mdx
new file mode 100644
index 0000000000..f3cdb583df
--- /dev/null
+++ b/docs/src/pages/changelog.mdx
@@ -0,0 +1,56 @@
+---
+title: Changelog
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ architecture,
+ ]
+---
+
+import Changelog from "@/components/Changelog"
+import fs from 'fs'
+import path from 'path'
+import matter from 'gray-matter'
+import { format } from 'date-fns'
+
+export const getStaticProps = async() => {
+ const getChangelog = await fs.readdirSync(path.join(process.cwd(), 'src/pages/changelog')).filter((file) => {
+ return path.extname(file).toLowerCase() === ".mdx" && !file.startsWith('index')
+ })
+ const changelog = []
+
+ for (const item of getChangelog) {
+ const content = fs.readFileSync(path.join(process.cwd(), `src/pages/changelog/${item}`), "utf8")
+ const frontmatter = matter(content)
+
+ if(!frontmatter.data.unlisted) {
+ changelog.push({
+ url: item.replace('.mdx', ''),
+ title: frontmatter?.data?.title || '',
+ ogImage: frontmatter?.data?.ogImage || null,
+ version: frontmatter?.data?.version || null,
+ description: frontmatter?.data?.description || null,
+ date: String(frontmatter?.data?.date) || null,
+ })
+ }
+ changelog.sort((a, b) => new Date(b.date) - new Date(a.date))
+ }
+
+
+ return {
+ props: {
+ ssg: changelog
+ },
+ }
+}
+
+
diff --git a/docs/src/pages/changelog/2023-12-21-faster-inference-across-platform.mdx b/docs/src/pages/changelog/2023-12-21-faster-inference-across-platform.mdx
new file mode 100644
index 0000000000..d13ff1a966
--- /dev/null
+++ b/docs/src/pages/changelog/2023-12-21-faster-inference-across-platform.mdx
@@ -0,0 +1,17 @@
+---
+title: "Faster inference across: Mac, Windows, Linux, and GPUs"
+version: 0.4.3
+description: ""
+date: 2023-12-21
+ogImage: "/assets/images/changelog/Jan_v0.4.3.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Custom models: `Trinity`, `Pandora` (great for general use).
+- Faster inference across: Mac, Windows, Linux, and GPUs.
+- Connect to remote OpenAI models like GPT4 via API key.
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-01-16-settings-options-right-panel.mdx b/docs/src/pages/changelog/2024-01-16-settings-options-right-panel.mdx
new file mode 100644
index 0000000000..202e5008c4
--- /dev/null
+++ b/docs/src/pages/changelog/2024-01-16-settings-options-right-panel.mdx
@@ -0,0 +1,20 @@
+---
+title: "Thread settings options in the right panel"
+version: 0.4.4
+description: ""
+date: 2024-01-16
+ogImage: ""
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- You can now see whether the model is compatible with running on your device.
+- You can switch model mid-threads.
+- More thread settings options in the right panel.
+- CI automation, anti-virus checks.
+- Social media access to Jan's Discord & Github from the app for further user support.
+- Fixed major bugs, more stability.
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-01-29-local-api-server.mdx b/docs/src/pages/changelog/2024-01-29-local-api-server.mdx
new file mode 100644
index 0000000000..3a00919fbe
--- /dev/null
+++ b/docs/src/pages/changelog/2024-01-29-local-api-server.mdx
@@ -0,0 +1,23 @@
+---
+title: "Local API server"
+version: 0.4.5
+description: ""
+date: 2024-01-29
+ogImage: "/assets/images/changelog/Jan_v0.4.5.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Local API Server: Experience Jan's dashboard for the local API server to make your data processing smoother and more efficient.
+- HTTP Proxy Support: Now, downloading and connecting are seamless, just like browsing Hugging Face in your browser.
+- Updated Settings Page: Find what you need faster! We've updated the settings page.
+
+### Fixes 💫
+
+- Auto Update: Enjoy smoother updates. We've fixed the glitches.
+- Swagger API Page: Full documentation, no more blanks.
+- GPU for Models: Your imported models now fully leverage GPU power.
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-02-05-jan-data-folder.mdx b/docs/src/pages/changelog/2024-02-05-jan-data-folder.mdx
new file mode 100644
index 0000000000..5cef9c3540
--- /dev/null
+++ b/docs/src/pages/changelog/2024-02-05-jan-data-folder.mdx
@@ -0,0 +1,22 @@
+---
+title: "Jan Data Folder"
+version: 0.4.6
+description: ""
+date: 2024-02-05
+ogImage: "/assets/images/changelog/jan_product_update_feature.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- `Changing Jan Data Folder`: Now, moving your data across different folders is just a click away, making organization simpler. So you can even use an external drive.
+- Factory Settings: You can reset all of Jan's usage data for a fresh start.
+
+### Fixes 💫
+
+- Easily see each thread's last update time, like WhatsApp, keeping you informed.
+- A new loading screen during data migration ensures the app is responsive.
+- Enhanced notifications for clearer feedback on model runs or errors.
diff --git a/docs/src/pages/changelog/2024-02-26-home-servers-with-helm.mdx b/docs/src/pages/changelog/2024-02-26-home-servers-with-helm.mdx
new file mode 100644
index 0000000000..89e973e9e0
--- /dev/null
+++ b/docs/src/pages/changelog/2024-02-26-home-servers-with-helm.mdx
@@ -0,0 +1,24 @@
+---
+title: "Run Jan on your home-servers with Helm"
+version: 0.4.7
+description: ""
+date: 2024-02-26
+ogImage: ""
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Run Jan on your `home-servers` with `Helm`
+- Use Jan headless or with a Web UI
+- `Intel Arc` & `AMD GPU` support through `Vulkan` & `LlamaCPP`
+
+
+### Features & Fixes 💫
+
+- 48 fixes, refactoring and stability improvements.
+- Conversation threads are auto-summarized & messages are editable.
+- Encountering an error? We've replaced vague alerts with a troubleshooting assistant.
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-03-06-ui-revamp-settings.mdx b/docs/src/pages/changelog/2024-03-06-ui-revamp-settings.mdx
new file mode 100644
index 0000000000..e2ba0c6be7
--- /dev/null
+++ b/docs/src/pages/changelog/2024-03-06-ui-revamp-settings.mdx
@@ -0,0 +1,27 @@
+---
+title: "New UI & Codestral Support"
+version: 0.5.0
+description: "Revamped Jan's UI to make it clearer and more user-friendly"
+date: 2024-06-03
+ogImage: "/assets/images/changelog/jan_v0.5.0.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+Revamped Jan's UI to make it clearer and more user-friendly.
+
+- Updated Assistant, Model, and Tools sections
+- Categorized customization options for easier control
+- New settings for models, APIs, and experimental features
+
+## New Model: Codestral
+
+Jan now supports Mistral's new model Codestral. Thanks [Bartowski](https://huggingface.co/bartowski) for the GGUF model. You can download the model from the hub.
+
+## More GGUF models
+
+More GGUF models can run in Jan - we rebased to llama.cpp b3012.Big thanks to [ggerganov](https://github.com/ggerganov)
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.0).
diff --git a/docs/src/pages/changelog/2024-03-11-import-models.mdx b/docs/src/pages/changelog/2024-03-11-import-models.mdx
new file mode 100644
index 0000000000..b1321d4991
--- /dev/null
+++ b/docs/src/pages/changelog/2024-03-11-import-models.mdx
@@ -0,0 +1,22 @@
+---
+title: "Import models directly using the UI"
+version: 0.4.8
+description: ""
+date: 2024-03-11
+ogImage: ""
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Import models directly using the UI
+- A revamped system monitor
+
+### Features & Fixes 💫
+
+- Install Jan using Homebrew on Mac Silicon (thanks to https://github.com/chenrui333 (I'll tag you when I find your Discord handle! :D)).
+- Fixed an HTTP Proxy issue causing download errors.
+- UI Improvements and more.
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-03-19-nitro-tensorrt-llm-extension.mdx b/docs/src/pages/changelog/2024-03-19-nitro-tensorrt-llm-extension.mdx
new file mode 100644
index 0000000000..dacd4be75a
--- /dev/null
+++ b/docs/src/pages/changelog/2024-03-19-nitro-tensorrt-llm-extension.mdx
@@ -0,0 +1,18 @@
+---
+title: "Nitro-Tensorrt-LLM Extension"
+version: 0.4.9
+description: ""
+date: 2024-03-19
+ogImage: ""
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Nitro-Tensorrt-LLM Extension.
+- Update models.json.
+- Move tensorrt executable to the engine.
+
diff --git a/docs/src/pages/changelog/2024-04-02-groq-api-integration.mdx b/docs/src/pages/changelog/2024-04-02-groq-api-integration.mdx
new file mode 100644
index 0000000000..0a35794abf
--- /dev/null
+++ b/docs/src/pages/changelog/2024-04-02-groq-api-integration.mdx
@@ -0,0 +1,22 @@
+---
+title: "Groq API Integration"
+version: 0.4.10
+description: ""
+date: 2024-04-02
+ogImage: "/assets/images/changelog/jan_update_groq.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Groq API Integration
+- Enhanced hardware troubleshooting guide
+
+### Features & Fixes 💫
+
+- Improved Jan data folder's functionality.
+- Fixed URI malformed and `stop` parameter error.
+- VRAM-aware model recommendations.
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-04-15-new-mistral-extension.mdx b/docs/src/pages/changelog/2024-04-15-new-mistral-extension.mdx
new file mode 100644
index 0000000000..3ce6cbf816
--- /dev/null
+++ b/docs/src/pages/changelog/2024-04-15-new-mistral-extension.mdx
@@ -0,0 +1,18 @@
+---
+title: "New Mistral Extension"
+version: 0.4.11
+description: "Jan has a new Mistral Extension letting you chat with larger Mistral models via Mistral API"
+date: 2024-04-15
+ogImage: "/assets/images/changelog/jan_mistral_api.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+### Highlights 🎉
+
+- Jan has a new `Mistral Extension` letting you chat with larger Mistral models via Mistral API. You can still run smaller Mistral models locally, but now there's a remote option.
+- 3rd party extensions can register their own settings pages in the app without any code.
+- You can now change set API Prefix for the local API server.
+- You can now customize your Assistant's name in Thread Settings.
diff --git a/docs/src/pages/changelog/2024-04-25-llama3-command-r-hugginface.mdx b/docs/src/pages/changelog/2024-04-25-llama3-command-r-hugginface.mdx
new file mode 100644
index 0000000000..8998b9a8a0
--- /dev/null
+++ b/docs/src/pages/changelog/2024-04-25-llama3-command-r-hugginface.mdx
@@ -0,0 +1,31 @@
+---
+title: 'Jan now supports Llama3 and Command R+'
+version: 0.4.12
+description: "Jan has added compatibility with Llama3 & Command R+"
+date: 2024-04-25
+ogImage: "/assets/images/changelog/jan_llama3.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+Jan has added compatibility with Meta’s open-source language model, `Llama3`, through the integration with `llamacpp` (thanks to [@ggerganov](https://github.com/ggerganov)).
+
+Additionally, `Command R+` is now supported. It is the first open-source model to surpass GPT-4 on the [LMSys leaderboard](https://chat.lmsys.org/?leaderboard).
+
+![Commandr](/assets/images/changelog/jan_cohere_commandr.gif)
+
+## Import Huggingface models directly
+
+Users can now import Huggingface models into Jan. Simply copy the model’s link from Huggingface and paste it into the search bar on Jan Hub.
+
+![HugginFace](/assets/images/changelog/jan_hugging_face.gif)
+
+## Enhanced LaTeX understanding
+
+Jan now understands LaTeX, allowing users to process and understand complex mathematical expressions more effectively.
+
+![Latex](/assets/images/changelog/jan_update_latex.gif)
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.4.12).
diff --git a/docs/src/pages/changelog/2024-05-20-llamacpp-upgrade-new-remote-models.mdx b/docs/src/pages/changelog/2024-05-20-llamacpp-upgrade-new-remote-models.mdx
new file mode 100644
index 0000000000..11e8013ac7
--- /dev/null
+++ b/docs/src/pages/changelog/2024-05-20-llamacpp-upgrade-new-remote-models.mdx
@@ -0,0 +1,31 @@
+---
+title: "Jan now supports more GGUF models"
+version: 0.4.13
+description: "We rebased to llamacpp b2865."
+date: 2024-05-20
+ogImage: "/assets/images/changelog/jan_v0.4.13_update.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+With this release, more GGUF models should work now! We rebased to llamacpp b2865!
+
+## New remote models: Anthropic & Cohere APIs
+
+Jan now supports `Anthropic API` models `Command R` and `Command R+`, along with `Cohere`'s `Claude 3 Opus`, `Claude 3 Sonnet`, and `Claude 3 Haiku`.
+
+## New integrations: Martian and OpenRouter
+
+Jan supports `Martian`, a dynamic LLM router that routes between multiple models and allows users to reduce costs by 20% to 97%. Jan also supports `OpenRouter`, helping users select the best model for each query.
+
+![New_Integrations](/assets/images/changelog/jan_v0.4.13_update.gif)
+
+## GPT-4o Access
+
+Users can now connect to OpenAI's new model GPT-4o.
+
+![GPT4o](/assets/images/changelog/jan_v0_4_13_openai_gpt4o.gif)
+
+For more details, see the [GitHub release notes.](https://github.com/janhq/jan/releases/tag/v0.4.13)
diff --git a/docs/src/pages/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium.mdx b/docs/src/pages/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium.mdx
new file mode 100644
index 0000000000..3d628afd77
--- /dev/null
+++ b/docs/src/pages/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium.mdx
@@ -0,0 +1,19 @@
+---
+title: "Jan now compatible with Aya 23 8B & 35B and Phi-3-Medium"
+version: 0.4.14
+description: "Jan now supports Cohere's Aya 23 8B & 35B and Microsoft's Phi-3-Medium."
+date: 2024-05-28
+ogImage: "/assets/images/changelog/jan-v0-4-14-phi3.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+Jan now supports `Cohere`'s new models `Aya 23 (8B)` & `Aya 23 (35B)` and `Microsoft`'s `Phi-3-Medium`.
+
+More GGUF models can run in Jan - we rebased to llama.cpp b2961.
+
+Huge shoutouts to [ggerganov](https://github.com/ggerganov) and contributors for llama.cpp, and [Bartowski](https://huggingface.co/bartowski) for GGUF models.
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.4.14).
diff --git a/docs/src/pages/changelog/2024-06-21-nvidia-nim-support.mdx b/docs/src/pages/changelog/2024-06-21-nvidia-nim-support.mdx
new file mode 100644
index 0000000000..ee7e7282fd
--- /dev/null
+++ b/docs/src/pages/changelog/2024-06-21-nvidia-nim-support.mdx
@@ -0,0 +1,29 @@
+---
+title: "Jan supports NVIDIA NIM"
+version: 0.5.1
+description: "Jan has integrated NVIDIA NIM and supports Qwen 2 7B"
+date: 2024-06-21
+ogImage: "/assets/images/changelog/jan_nvidia_nim_support.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+## NVIDIA NIM
+
+We've integrated NVIDIA NIM support.
+
+## Qwen 2 7B
+
+You can now access Qwen 2 7B directly in the Jan Hub.
+
+We've updated to llama.cpp b3088 for better performance - thanks to [GG](https://github.com/ggerganov)
+
+## Fixes
+
+- Fixed Anthropic API error
+- Reduced chat font weight (back to normal!)
+- Restored the maximize button
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.1).
diff --git a/docs/src/pages/changelog/2024-07-15-claude-3-5-support.mdx b/docs/src/pages/changelog/2024-07-15-claude-3-5-support.mdx
new file mode 100644
index 0000000000..b3d8854e32
--- /dev/null
+++ b/docs/src/pages/changelog/2024-07-15-claude-3-5-support.mdx
@@ -0,0 +1,35 @@
+---
+title: "Jan supports Claude 3.5 Sonnet"
+version: 0.5.2
+description: "You can run Claude 3.5 Sonnet in Jan"
+date: 2024-07-15
+ogImage: "/assets/images/changelog/jan_supports_claude_3_5.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+## Claude 3.5 Sonnet
+
+We've integrated support for Claude 3.5 Sonnet, you can run Anthropic's latest model in Jan.
+
+Plus, You can now use optional spell check for chats. There's also a new shortcut for app logs in System Monitor.
+
+## Fixes
+
+In this hotfix, we've addressed several issues to improve your Jan experience:
+
+### Gemma 2B Stability
+
+Gemma 2B now runs without any issues.
+
+### Tooltip Hover Functionality
+
+We've restored the tooltip hover functionality, which makes it easier to access helpful information without any glitches.
+
+### Right-click Options for Thread Settings
+
+The right-click options for thread settings are now fully operational again. You can now manage your threads with this fix.
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.2).
diff --git a/docs/src/pages/changelog/2024-09-01-llama3-1-gemma2-support.mdx b/docs/src/pages/changelog/2024-09-01-llama3-1-gemma2-support.mdx
new file mode 100644
index 0000000000..4f869cd0ce
--- /dev/null
+++ b/docs/src/pages/changelog/2024-09-01-llama3-1-gemma2-support.mdx
@@ -0,0 +1,26 @@
+---
+title: "v0.5.3 is out with stability improvements!"
+version: 0.5.3
+description: "You can run Llama 3.1 and Gemma 2 in Jan"
+date: 2024-08-29
+ogImage: "/assets/images/changelog/janv0.5.3.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+## Llama 3.1 and Gemma 2 Support
+
+Jan finally supports Meta's Llama 3.1 and Google's Gemma 2. Thanks for the patience folks!
+
+We've been working on stability issues over the last few weeks. Jan is now more stable.
+
+### Additional Notes
+
+- Upgraded the inference engine for better performance
+- Model settings now persist across new threads
+- Fixed the GPU memory utilization bar
+- Some UX and copy improvements
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.3).
diff --git a/docs/src/pages/changelog/2024-09-17-improved-cpu-performance.mdx b/docs/src/pages/changelog/2024-09-17-improved-cpu-performance.mdx
new file mode 100644
index 0000000000..9bf0def6ea
--- /dev/null
+++ b/docs/src/pages/changelog/2024-09-17-improved-cpu-performance.mdx
@@ -0,0 +1,35 @@
+---
+title: "Jan is faster now"
+version: 0.5.4
+description: "Jan has faster CPU inference."
+date: 2024-09-17
+ogImage: "/assets/images/changelog/jan-v0.5.4.jpg"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+
+
+Over the last few weeks, we've been working on improving Jan's stability. Every update helps us make Jan's experience faster and smoother.
+With version 0.5.4, you’ll notice AI running quicker on CPU - better performance all around.
+
+### Model Downloads
+You can now download models directly from the model selector in Threads. No more jumping between different tabs – just pick, download, and get started.
+
+### Fast CPU Inference
+We've addressed the slower inference speeds on CPU, so you'll notice faster processing times, especially when using larger models.
+
+### Model Starts
+We tackled the notorious "model can't start / The specified module could not be found" error.
+Plus, Phi-3 models are now working smoothly even if you're using an outdated version.
+
+### Consistent Warnings
+Performance warnings are now aligned between Model Hub and Threads, giving you more reliable insights no matter where you're working.
+
+### Persistent Thread Settings
+Switching between threads used to reset your instruction settings. That’s fixed now! Your settings will stay intact as you jump between old and new threads.
+
+### Minor UI Tweaks & Bug Fixes
+We’ve also resolved issues with the input slider on the right panel and tackled several smaller bugs to keep everything running smoothly.
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.4).
diff --git a/docs/src/pages/changelog/_meta.json b/docs/src/pages/changelog/_meta.json
new file mode 100644
index 0000000000..e23bae0c06
--- /dev/null
+++ b/docs/src/pages/changelog/_meta.json
@@ -0,0 +1,8 @@
+{
+ "*": {
+ "type": "page",
+ "theme": {
+ "layout": "default"
+ }
+ }
+}
diff --git a/docs/src/pages/cortex/_assets/architecture.png b/docs/src/pages/cortex/_assets/architecture.png
new file mode 100644
index 0000000000..a91f1a5482
Binary files /dev/null and b/docs/src/pages/cortex/_assets/architecture.png differ
diff --git a/docs/src/pages/cortex/_assets/cortex-cover.png b/docs/src/pages/cortex/_assets/cortex-cover.png
new file mode 100644
index 0000000000..9ef3816f84
Binary files /dev/null and b/docs/src/pages/cortex/_assets/cortex-cover.png differ
diff --git a/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png b/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png
new file mode 100644
index 0000000000..5cc93b2a24
Binary files /dev/null and b/docs/src/pages/cortex/_assets/cortex-llamacpp-act.png differ
diff --git a/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png b/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png
new file mode 100644
index 0000000000..6118d91265
Binary files /dev/null and b/docs/src/pages/cortex/_assets/cortex-llamacpp-arch.png differ
diff --git a/docs/src/pages/cortex/_meta.json b/docs/src/pages/cortex/_meta.json
new file mode 100644
index 0000000000..74a870da58
--- /dev/null
+++ b/docs/src/pages/cortex/_meta.json
@@ -0,0 +1,136 @@
+{
+ "-- Switcher": {
+ "type": "separator",
+ "title": "Switcher"
+ },
+ "get-started": {
+ "title": "GET STARTED",
+ "type": "separator"
+ },
+ "index": {
+ "title": "Overview",
+ "href": "/cortex"
+ },
+ "quickstart": {
+ "title": "Quickstart"
+ },
+ "hardware": {
+ "title": "Hardware"
+ },
+ "installation": {
+ "title": "Installation"
+ },
+ "basicusage": {
+ "title": "BASIC USAGE",
+ "type": "separator"
+ },
+ "command-line": {
+ "title": "CLI"
+ },
+ "ts-library": {
+ "title": "Typescript Library"
+ },
+ "py-library": {
+ "title": "Python Library"
+ },
+ "server": {
+ "title": "Server Endpoint"
+ },
+ "capabilities": {
+ "title": "CAPABILITIES",
+ "type": "separator"
+ },
+ "text-generation": {
+ "title": "Text Generation"
+ },
+ "function-calling": {
+ "display": "hidden",
+ "title": "Function Calling"
+ },
+ "embeddings": {
+ "display": "hidden",
+ "title": "Embeddings"
+ },
+ "fine-tuning": {
+ "display": "hidden",
+ "title": "Fine-tuning"
+ },
+ "vision": {
+ "display": "hidden",
+ "title": "Vision"
+ },
+ "model-operations": {
+ "display": "hidden",
+ "title": "Model Operations"
+ },
+ "rag": {
+ "display": "hidden",
+ "title": "RAG"
+ },
+ "assistant": {
+ "display": "hidden",
+ "title": "ASSISTANTS",
+ "type": "separator"
+ },
+ "assistants": {
+ "display": "hidden",
+ "title": "Overview"
+ },
+ "commandline": {
+ "title": "COMMAND LINE",
+ "type": "separator"
+ },
+ "cli": {
+ "title": "cortex"
+ },
+ "training-engines": {
+ "display": "hidden",
+ "title": "TRAINING ENGINES"
+ },
+ "extensions": {
+ "display": "hidden",
+ "title": "EXTENSIONS",
+ "type": "separator"
+ },
+ "build-extension": {
+ "display": "hidden",
+ "title": "Build an Extension"
+ },
+ "architectures": {
+ "title": "ARCHITECTURE",
+ "type": "separator"
+ },
+ "architecture": {
+ "title": "Cortex"
+ },
+ "cortex-cpp": {
+ "title": "Cortex.cpp"
+ },
+ "cortex-llamacpp": {
+ "title": "Cortex.llamacpp"
+ },
+ "cortex-tensorrt-llm": {
+ "title": "Cortex.tensorrt-llm",
+ "display": "hidden"
+ },
+ "cortex-python": {
+ "title": "Cortex.python",
+ "display": "hidden"
+ },
+ "cortex-openvino": {
+ "title": "Cortex.OpenVino",
+ "display": "hidden"
+ },
+ "ext-architecture": {
+ "display": "hidden",
+ "title": "Extensions"
+ },
+ "troubleshooting": {
+ "title": "TROUBLESHOOTING",
+ "type": "separator"
+ },
+ "error-codes": {
+ "display": "hidden",
+ "title": "Error Codes"
+ }
+}
diff --git a/docs/src/pages/cortex/architecture.mdx b/docs/src/pages/cortex/architecture.mdx
new file mode 100644
index 0000000000..11959ccc30
--- /dev/null
+++ b/docs/src/pages/cortex/architecture.mdx
@@ -0,0 +1,202 @@
+---
+title: Overview
+description: Cortex Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+## Introduction
+
+Cortex is an alternative to the OpenAI API designed to operate entirely on your local hardware infrastructure. This headless backend platform is also engineered to support TensorRT-LLM, ensuring high-performance machine-learning model execution. It is packaged with a Docker-inspired command-line interface and a Typescript client library.
+
+The following guide details Cortex's core components, providing insights and instructions for those interested in customizing It to meet specific requirements.
+
+
+## Architecture
+
+![Architecture](./_assets/architecture.png)
+
+### Main Components
+
+Cortex is architected with several key components, each designed to fulfill specific roles within the system, ensuring efficient processing and response to client requests.
+
+1. **Cortex JS**: This component acts as the interface layer where requests are received and responses are sent.
+2. **Server:** The central processing unit of Cortex, this component coordinates all activities across the system. It manages the data flow and ensures operations are correctly executed.
+3. **Kernel**: This component checks the server's hardware configuration. Based on the current hardware setup, it determines whether additional dependencies are required, optimizing the system for performance and compatibility.
+4. **Runtime**: This process involves dynamically loading necessary libraries and models based on the server's current needs and processing requests.
+5. **Dynamic Libraries**: Consists of inference engines loaded on-demand to enhance Cortex's processing power. These engines are essential for performing specialized computational tasks. Currently, Cortex supports:
+ - Llama.cpp Engine
+ - TensorRT-LLM Engine
+ - Python-runtime Engine
+
+### Data Structure
+
+Cortex is equipped with **MySQL** and **SQLite** databases, offering flexible data management options that can be easily adapted to different environments and requirements. It also has a filesystem data that can store and retrieve data using file-based mechanisms.
+
+#### MySQL
+
+This database is used because it is ideal for Cortex environments where scalability, security, and data integrity are critical. MySQL is well-suited for handling large model-size data from the core extensions.
+
+#### SQLite
+
+This database is used for simplicity and minimal setup. It can handle the small model size from the core extensions and any data from the External extensions.
+
+#### File System
+
+Cortex uses a filesystem approach for managing configuration files, such as `model.yaml` files. These files are stored in a structured directory hierarchy, enabling efficient data retrieval and management.
+
+### Providers
+
+#### Internal Provider
+
+Integral to the CLI, it includes the core binary (**`.cpp`**) and is compiled directly with the CLI, facilitating all application parts' direct access to core functionalities.
+
+#### Core Extensions
+
+These are bundled with the CLI and include additional functionalities like remote engines and API models, facilitating more complex operations and interactions within the same architectural framework.
+
+#### External Extensions
+
+These are designed to be more flexible and are stored externally. They represent potential future expansions or integrations, allowing the architecture to extend its capabilities without modifying the core system.
+
+### Key Dependencies
+
+Cortex developed using NestJS and operates via a Node.js server framework, handling all incoming and outgoing requests. It also has a C++ runtime to handle stateless requests.
+
+Below is a detailed overview of its core architecture components:
+
+#### NestJS Framework
+
+NestJS framework serves as the backbone of the Cortex. This framework facilitates the organization of server-side logic into modules, controllers, and extensions, which are important for maintaining a clean codebase and efficient request handling.
+
+#### Node.js Server
+
+Node.js is the primary runtime for Cortex, which handles the HTTP requests, executes the server-side logic, and manages the responses.
+
+#### C++ Runtime
+
+C++ runtime is important for managing stateless requests. This component can handle intensive tasks that require optimized performance.
+
+## Code Structure
+
+The repository is organized to separate concerns between domain definitions, business rules, and adapters or implementations.
+```
+# Entity Definitions
+domain/ # This is the core directory where the domains are defined.
+ abstracts/ # Abstract base classes for common attributes and methods.
+ models/ # Domain interface definitions, e.g. model, assistant.
+ repositories/ # Extensions abstract and interface
+
+# Business Rules
+usecases/ # Application logic
+ assistants/ # CRUD logic (invokes dtos, entities).
+ chat/ # Logic for chat functionalities.
+ models/ # Logic for model operations.
+
+# Adapters & Implementations
+infrastructure/ # Implementations for Cortex interactions
+ commanders/ # CLI handlers
+ models/
+ questions/ # CLI installation UX
+ shortcuts/ # CLI chained syntax
+ types/
+ usecases/ # Invokes UseCases
+
+ controllers/ # Nest controllers and HTTP routes
+ assistants/ # Invokes UseCases
+ chat/ # Invokes UseCases
+ models/ # Invokes UseCases
+
+ database/ # Database providers (mysql, sqlite)
+
+ # Framework specific object definitions
+ dtos/ # DTO definitions (data transfer & validation)
+ entities/ # TypeORM entity definitions (db schema)
+
+ # Providers
+ providers/cortex # Cortex [server] provider (a core extension)
+ repositories/extensions # Extension provider (core & external extensions)
+
+extensions/ # External extensions
+command.module.ts # CLI Commands List
+main.ts # Entrypoint
+
+```
+
+The structure above promotes clean architecture principles, allowing for scalable and maintainable Cortex development.
+
+
+## Runtime
+```mermaid
+sequenceDiagram
+ User-)Cortex: "Tell me a joke"
+ Cortex->>HF: Download a model
+ Cortex->>Model Controller/Service: Start the model
+ Cortex->>Chat Controller/Service: POST /completions
+ Chat Controller/Service ->> Chat UseCases: createChatCompletions()
+ Chat UseCases -->> Model Entity: findOne()
+ Cortex->>Model Entity: Store the model data
+ Chat UseCases -->> Extension Repository: findAll()
+ Extension Repository ->> Cortex Provider: inference()
+ CortexCPP Server ->> Cortex Provider: Port /???
+
+ %% Responses
+ Cortex Provider ->> Extension Repository: inference()
+ Extension Repository ->> Chat UseCases: Response stream
+ Chat UseCases ->> Chat Controller/Service: Formatted response/stream
+ Chat Controller/Service ->> User: "Your mama"
+```
+The sequence diagram above outlines the interactions between various components in the Cortex system during runtime, particularly when handling user requests via a CLI. Here’s a detailed breakdown of the runtime sequence:
+
+1. **User Request**: The user initiates an interaction by requesting “a joke” via the Cortex CLI.
+2. **Model Activation**:
+ - The API directs the request to the `Model Controller/Service`.
+ - The service pulls and starts the appropriate model and posts a request to `'/completions'` to prepare the model for processing.
+3. **Chat Processing**:
+ - The `Chat Controller/Service` processes the user's request using `Chat UseCases`.
+ - The `Chat UseCases` interact with the Model Entity and Extension Repository to gather necessary data and logic.
+4. **Data Handling and Response Formation**:
+ - The `Model Entity` and `Extension Repository` perform data operations, which may involve calling a `Provider` for additional processing.
+ - Data is fetched, stored, and an inference is performed as needed.
+5. **Response Delivery**:
+ - The response is formatted by the `Chat UseCases` and streamed back to the user through the API.
+ - The user receives the processed response, completing the cycle of interaction.
+
+## Roadmap
+
+Our development roadmap outlines key features and epics we will focus on in the upcoming releases. These enhancements aim to improve functionality, increase efficiency, and expand Cortex's capabilities.
+
+- **Crash Report Telemetry**: Enhance error reporting and operational stability by automatically collecting and analyzing crash reports.
+- **RAG**: Improve response quality and contextual relevance in our AI models.
+- **Cortex TensorRT-LLM**: Optimize NVIDIA TensorRT optimizations for LLMs.
+- **Cortex Presets**: Streamline model configurations.
+- **Cortex Python Runtime**: Provide a scalable Python execution environment for Cortex.
+
+## Risks & Technical Debt
+
+Cortex CLI, built with Nest-commander, incorporates extensions to integrate various inference providers. This flexibility, however, introduces certain risks related to dependency management and the objective of bundling the CLI into a single executable binary.
+
+### Key Risks
+
+1. **Complex Dependencies**: Utilizing Nest-commander involves a deep dependency tree, risking version conflicts and complicating updates.
+2. **Bundling Issues**: Converting to a single executable can reveal issues with `npm` dependencies and relative asset paths, leading to potential runtime errors due to unresolved assets or incompatible binary dependencies.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/assistants.mdx b/docs/src/pages/cortex/assistants.mdx
new file mode 100644
index 0000000000..90e29c29a7
--- /dev/null
+++ b/docs/src/pages/cortex/assistants.mdx
@@ -0,0 +1,22 @@
+---
+title: Assistants
+description: Assistants
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/build-extension.mdx b/docs/src/pages/cortex/build-extension.mdx
new file mode 100644
index 0000000000..6c110b7c63
--- /dev/null
+++ b/docs/src/pages/cortex/build-extension.mdx
@@ -0,0 +1,22 @@
+---
+title: Build an Extension
+description: Build an Extension
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli.mdx b/docs/src/pages/cortex/cli.mdx
new file mode 100644
index 0000000000..24a0dd229a
--- /dev/null
+++ b/docs/src/pages/cortex/cli.mdx
@@ -0,0 +1,54 @@
+---
+title: Command Line Interface
+description: Cortex CLI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# Cortex
+
+Cortex is a CLI tool used to interact with the Jan application and its various functions.
+
+
+Cortex CLI is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex [command] [flag]
+```
+### Options
+```
+ -v, --version Cortex version (default: false)
+ -h, --help display help for command
+```
+## Sub Commands
+- [cortex models](/cortex/cli/models): Manage and configure models.
+- [cortex serve](/cortex/cli/serve): Launch an API endpoint server for the Cortex backend.
+- [cortex chat](/cortex/cli/chat): Send a chat request to a model.
+- [cortex init|setup](/cortex/cli/init): Initialize settings and download dependencies for Cortex.
+- [cortex ps](/cortex/cli/ps): Display active models and their operational status.
+- [cortex kill](/cortex/cli/kill): Terminate active Cortex processes.
+- [cortex pull|download](/cortex/cli/pull): Download a model.
+- [cortex run](/cortex/cli/run): Shortcut to start a model and chat **(EXPERIMENTAL)**.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/_meta.json b/docs/src/pages/cortex/cli/_meta.json
new file mode 100644
index 0000000000..dc2c73e695
--- /dev/null
+++ b/docs/src/pages/cortex/cli/_meta.json
@@ -0,0 +1,26 @@
+{
+ "init": {
+ "title": "cortex init"
+ },
+ "pull": {
+ "title": "cortex pull"
+ },
+ "run": {
+ "title": "cortex run"
+ },
+ "models": {
+ "title": "cortex models"
+ },
+ "ps": {
+ "title": "cortex ps"
+ },
+ "chat": {
+ "title": "cortex chat"
+ },
+ "kill": {
+ "title": "cortex kill"
+ },
+ "serve": {
+ "title": "cortex serve"
+ }
+}
diff --git a/docs/src/pages/cortex/cli/chat.mdx b/docs/src/pages/cortex/cli/chat.mdx
new file mode 100644
index 0000000000..e4c086b65c
--- /dev/null
+++ b/docs/src/pages/cortex/cli/chat.mdx
@@ -0,0 +1,47 @@
+---
+title: Cortex Chat
+description: Cortex chat command.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex chat`
+
+This command starts a chat session with a specified model, allowing you to interact directly with it through an interactive chat interface.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex chat --model MODEL_ID
+```
+### Options
+```
+ -t, --thread Thread Id. If not provided, will create new thread
+ -m, --message Message to send to the model
+ -a, --attach Attach to interactive chat session (default: false)
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/init.mdx b/docs/src/pages/cortex/cli/init.mdx
new file mode 100644
index 0000000000..7a95016e87
--- /dev/null
+++ b/docs/src/pages/cortex/cli/init.mdx
@@ -0,0 +1,49 @@
+---
+title: Cortex Models Init
+description: Cortex init command.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex init`
+
+This command initializes the cortex operations settings and downloads the required dependencies to run cortex.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Alias
+The following alias is also available for initializing cortex:
+- `cortex setup`
+
+## Usage
+
+```bash
+cortex init
+```
+
+## Options
+```
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/kill.mdx b/docs/src/pages/cortex/cli/kill.mdx
new file mode 100644
index 0000000000..eba4927dd1
--- /dev/null
+++ b/docs/src/pages/cortex/cli/kill.mdx
@@ -0,0 +1,45 @@
+---
+title: Cortex Kill
+description: Cortex kill command.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex kill`
+
+This command stops the currently running cortex processes.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex kill
+```
+
+## Options
+```
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models.mdx b/docs/src/pages/cortex/cli/models.mdx
new file mode 100644
index 0000000000..36ffa69a14
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models.mdx
@@ -0,0 +1,52 @@
+---
+title: Cortex Models
+description: Cortex CLI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models`
+
+This command allows you to start, stop, and manage various model operations within Cortex.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models API_COMMAND [OPTIONS]
+
+# Start a downloaded model
+cortex models start MODEL_ID
+
+# Stop a downloaded model
+cortex models stop MODEL_ID
+```
+
+## Options
+
+```
+ -h, --help display help for command
+```
diff --git a/docs/src/pages/cortex/cli/models/_meta.json b/docs/src/pages/cortex/cli/models/_meta.json
new file mode 100644
index 0000000000..592f89ac4e
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/_meta.json
@@ -0,0 +1,23 @@
+{
+ "download": {
+ "title": "cortex models pull"
+ },
+ "list": {
+ "title": "cortex models list"
+ },
+ "get": {
+ "title": "cortex models get"
+ },
+ "update": {
+ "title": "cortex models update"
+ },
+ "start": {
+ "title": "cortex models start"
+ },
+ "stop": {
+ "title": "cortex models stop"
+ },
+ "remove": {
+ "title": "cortex models remove"
+ }
+}
diff --git a/docs/src/pages/cortex/cli/models/download.mdx b/docs/src/pages/cortex/cli/models/download.mdx
new file mode 100644
index 0000000000..0adb63cb6c
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/download.mdx
@@ -0,0 +1,49 @@
+---
+title: Cortex Models Pull
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models pull`
+
+This command downloads a model. You can use a HuggingFace `MODEL_ID` to download a model.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models pull MODEL_ID
+```
+## Alias
+The following alias is also available for downloading models:
+- `cortex models download _`
+
+## Options
+```
+ -m, --model Model Id to start chat with
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/get.mdx b/docs/src/pages/cortex/cli/models/get.mdx
new file mode 100644
index 0000000000..3f7ce2be36
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/get.mdx
@@ -0,0 +1,45 @@
+---
+title: Cortex Models Get
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models get`
+
+This command returns a model detail defined by a `MODEL_ID`.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models get MODEL_ID
+```
+
+## Options
+```
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/list.mdx b/docs/src/pages/cortex/cli/models/list.mdx
new file mode 100644
index 0000000000..5ab1389f54
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/list.mdx
@@ -0,0 +1,46 @@
+---
+title: Cortex Models List
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models list`
+
+This command lists all local models.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models list
+```
+
+## Options
+```
+ -f, --format Print models list in table or json format (default: "json")
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/remove.mdx b/docs/src/pages/cortex/cli/models/remove.mdx
new file mode 100644
index 0000000000..c972eacae4
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/remove.mdx
@@ -0,0 +1,45 @@
+---
+title: Cortex Models Remove
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models remove`
+
+This command deletes a local model defined by a `MODEL_ID`.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models remove MODEL_ID
+```
+
+## Options
+```
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/start.mdx b/docs/src/pages/cortex/cli/models/start.mdx
new file mode 100644
index 0000000000..355f20ba3c
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/start.mdx
@@ -0,0 +1,46 @@
+---
+title: Cortex Models Start
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models start`
+
+This command starts a model defined by a `MODEL_ID`.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models start MODEL_ID
+```
+
+## Options
+```
+ -a, --attach Attach to interactive chat session (default: false)
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/stop.mdx b/docs/src/pages/cortex/cli/models/stop.mdx
new file mode 100644
index 0000000000..a7a95fa001
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/stop.mdx
@@ -0,0 +1,45 @@
+---
+title: Cortex Models Stop
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models stop`
+
+This command stops a model defined by a `MODEL_ID`.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models stop MODEL_ID
+```
+
+## Options
+```
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/models/update.mdx b/docs/src/pages/cortex/cli/models/update.mdx
new file mode 100644
index 0000000000..129fa36340
--- /dev/null
+++ b/docs/src/pages/cortex/cli/models/update.mdx
@@ -0,0 +1,48 @@
+---
+title: Cortex Models Update
+description: Cortex models subcommands.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex models update`
+
+This command updates a model configuration defined by a `MODEL_ID`.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex models update MODEL_ID OPTIONS
+```
+
+## Options
+```
+ -m, --model Model Id to update
+ -c, --options Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For
+ example: cortex models update -c max_tokens=100 temperature=0.5
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/ps.mdx b/docs/src/pages/cortex/cli/ps.mdx
new file mode 100644
index 0000000000..4655c25c33
--- /dev/null
+++ b/docs/src/pages/cortex/cli/ps.mdx
@@ -0,0 +1,48 @@
+---
+title: Cortex Ps
+description: Cortex ps command.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex ps`
+
+This command shows the running model and its status.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex ps
+```
+For example, it returns the following table:
+```bash
+┌─────────┬──────────────────────┬───────────────────┬───────────┬──────────┬─────┬──────┐
+│ (index) │ modelId │ engine │ status │ duration │ ram │ vram │
+├─────────┼──────────────────────┼───────────────────┼───────────┼──────────┼─────┼──────┤
+│ 0 │ 'janhq/tinyllama/1b' │ 'cortex.llamacpp' │ 'running' │ '7s' │ '-' │ '-' │
+└─────────┴──────────────────────┴───────────────────┴───────────┴──────────┴─────┴──────┘
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cli/pull.mdx b/docs/src/pages/cortex/cli/pull.mdx
new file mode 100644
index 0000000000..0c71bb6668
--- /dev/null
+++ b/docs/src/pages/cortex/cli/pull.mdx
@@ -0,0 +1,82 @@
+---
+title: Cortex Pull
+description: Cortex CLI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex pull`
+
+This command facilitates downloading machine learning models from various model hubs, including the popular 🤗 [Hugging Face](https://huggingface.co/).
+
+By default, models are downloaded to the `node_modules library path. For additional information on storage paths and options, refer [here](/cortex/cli#storage).
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Alias
+The following alias is also available for downloading models:
+- `cortex download _`
+
+## Usage
+
+### Preconfigured Models
+
+Reconfigured models (with optimal runtime parameters and templates) are available from the [Jan Model Hub](https://huggingface.co/janhq) on Hugging Face.
+
+Models can be downloaded using a Docker-like interface with the following syntax: `repo_name:branch_name`. Each variant may include different quantizations and sizes, typically organized in the repository’s branches.
+
+Available models include [llama3](https://huggingface.co/janhq/llama3), [mistral](https://huggingface.co/janhq/mistral), [tinyllama](https://huggingface.co/janhq/tinyllama), and [many more](https://huggingface.co/janhq).
+
+
+New models will soon be added to HuggingFace's janhq repository.
+
+
+```bash
+# Pull a specific variant with `repo_name:branch`
+cortex pull llama3:7b
+```
+You can also download `size`, `format`, and `quantization` variants of each model.
+
+```bash
+cortex pull llama3:8b-instruct-v3-gguf-Q4_K_M
+cortex pull llama3:8b-instruct-v3-tensorrt-llm
+```
+
+Model variants are provided via the `branches` in each model's Hugging Face repo.
+
+### Hugging Face Models
+
+You can download any GGUF, TensorRT, or supported-format model directly from Hugging Face.
+
+```bash
+# cortex pull org_name/repo_name
+cortex pull microsoft/Phi-3-mini-4k-instruct-gguf
+```
+
+## Options
+
+```
+ -h, --help display help for command
+```
diff --git a/docs/src/pages/cortex/cli/run.mdx b/docs/src/pages/cortex/cli/run.mdx
new file mode 100644
index 0000000000..59067322a1
--- /dev/null
+++ b/docs/src/pages/cortex/cli/run.mdx
@@ -0,0 +1,53 @@
+---
+title: Cortex Run
+description: Cortex run command
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex run`
+
+This command facilitates the initiation of an interactive chat shell with a specified machine learning model.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex run MODEL_ID
+```
+### Options
+```
+ -t, --thread Thread Id. If not provided, will create new thread
+ -h, --help display help for command
+```
+
+## Command Chain
+
+`cortex run` command is a convenience wrapper that automatically executes a sequence of commands to simplify user interactions:
+
+1. [`cortex start`](/cortex/cli/models/start): This command starts the specified model, making it active and ready for interactions.
+2. [`cortex chat`](/cortex/cli/chat): Following model activation, this command opens an interactive chat shell where users can directly communicate with the model.
+
diff --git a/docs/src/pages/cortex/cli/serve.mdx b/docs/src/pages/cortex/cli/serve.mdx
new file mode 100644
index 0000000000..bc46683e9f
--- /dev/null
+++ b/docs/src/pages/cortex/cli/serve.mdx
@@ -0,0 +1,46 @@
+---
+title: Cortex Models Serve
+description: Cortex serve command.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# `cortex serve`
+
+This command runs the API endpoint server for the Cortex back-end.
+
+
+This command is compatible with all OpenAI and OpenAI-compatible endpoints.
+
+
+## Usage
+
+```bash
+cortex serve
+```
+
+## Options
+```
+ -h, --host configure the host for the API endpoint server
+ -h, --help display help for command
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/command-line.mdx b/docs/src/pages/cortex/command-line.mdx
new file mode 100644
index 0000000000..5b635cb902
--- /dev/null
+++ b/docs/src/pages/cortex/command-line.mdx
@@ -0,0 +1,81 @@
+---
+title: Command Line Interface
+description: Cortex CLI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# Command Line Interface
+
+The Cortex CLI provides a user-friendly platform for managing and operating large language models (LLMs), inspired by tools like Docker and GitHub CLI. Designed for straightforward installation and use, it simplifies the integration and management of LLMs.
+
+
+The Cortex CLI is OpenAI-compatible.
+
+
+## Installation
+To get started with the Cortex CLI, please see our guides:
+- [Quickstart](/cortex/quickstart)
+- [Device specific installation](/cortex/installation)
+
+These resources provide detailed instructions to ensure Cortex is set up correctly on your machine, accommodating various hardware environments.
+
+## Usage
+
+The Cortex CLI has a robust command set that streamlines your LLM interactions.
+
+Check out the [CLI reference pages](/cortex/cli) for a comprehensive guide on all available commands and their specific functions.
+
+## Storage
+
+By default, Cortex CLI stores model binaries, thread history, and other usage data in:
+`$(npm list -g @janhq/cortex)`.
+
+You can find the respective folders within the `/lib/node_modules/@janhq/cortex/dist/` subdirectory.
+
+
+**Ongoing Development**:
+- Customizable Storage Locations
+- Database Integration
+
+
+## CLI Syntax
+
+The Cortex CLI improves the developer experience by incorporating command chaining and syntactic enhancements.
+This approach automatically combines multiple operations into a single command, streamlining complex workflows. It simplifies the execution of extensive processes through integrated commands.
+
+### OpenAI API Equivalence
+
+The design of Cortex CLI commands strictly adheres to the method names used in the OpenAI API as a standard practice. This ensures a smooth transition for users familiar with OpenAI’s system.
+
+For example:
+- The `cortex chat` command is equivalent to the [`POST /v1/chat/completions` endpoint](/cortex/cortex-chat).
+
+- The `cortex models get ID` command is equivalent to the [`GET /models ${ID}` endpoint](/cortex/cortex-models).
+
+### Command Chaining
+
+Cortex CLI’s command chaining support allows multiple commands to be executed in sequence with a simplified syntax. This approach reduces the complexity of command inputs and speeds up development tasks.
+
+For example:
+- The [`cortex run`](/cortex/cortex-run), inspired by Docker and Github, starts the models and the inference engine, and provides a command line chat interface for easy testing.
diff --git a/docs/src/pages/cortex/cortex-cpp.mdx b/docs/src/pages/cortex/cortex-cpp.mdx
new file mode 100644
index 0000000000..bd39939dc6
--- /dev/null
+++ b/docs/src/pages/cortex/cortex-cpp.mdx
@@ -0,0 +1,77 @@
+---
+title: Cortex.cpp
+description: Cortex.cpp Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+# Cortex.cpp
+
+Cortex.cpp is a stateless, C++ server that is 100% compatible with OpenAI API (stateless endpoints).
+
+It includes a Drogon server, with request queues, model orchestration logic, and hardware telemetry, and more, for prod environments.
+
+This guide walks you through how Cortex.CPP is designed, the codebase structure, and future plans.
+
+## Usage
+
+See [Quickstart](/cortex/quickstart)
+
+## Interface
+
+## Architecture
+
+## Code Structure
+
+```md
+├── app/
+│ │ ├── controllers/
+│ │ ├── models/
+│ │ ├── services/
+│ │ ├── ?engines/
+│ │ │ ├── llama.cpp
+│ │ │ ├── tensorrt-llm
+│ │ │ └── ...
+│ │ └── ...
+│ ├── CMakeLists.txt
+│ ├── config.json
+│ ├── Dockerfile
+│ ├── docker-compose.yml
+│ ├── README.md
+│ └── ...
+```
+
+`cortex-cpp` folder contains stateless implementations, most of which call into `cortex.llamacpp` and `cortex.tensorrt-llm`, depending on the engine at runtime.
+
+Here you will find the implementations for stateless endpoints:
+- `/chat/completion`
+- `/audio`
+- `/fine_tuning`
+- `/embeddings`
+- `/load_model`
+- `/unload_model`
+
+And core hardware and model management logic like CPU instruction set detection, and multiple model loading logic.
+
+## Runtime
+
+## Roadmap
diff --git a/docs/src/pages/cortex/cortex-llamacpp.mdx b/docs/src/pages/cortex/cortex-llamacpp.mdx
new file mode 100644
index 0000000000..e4d5faa907
--- /dev/null
+++ b/docs/src/pages/cortex/cortex-llamacpp.mdx
@@ -0,0 +1,143 @@
+---
+title: Cortex.llamacpp
+description: Cortex.llamacpp Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+# Cortex.llamacpp
+
+Cortex.llamacpp is a C++ inference library that can be loaded by any server at runtime. It submodules (and occasionally upstreams) [llama.cpp](https://github.com/ggerganov/llama.cpp) for GGUF inference.
+
+In addition to llama.cpp, cortex.llamacpp adds:
+- OpenAI compatibility for the stateless endpoints
+- Model orchestration like model warm up and concurrent models
+
+
+Cortex.llamacpp is formerly called "Nitro".
+
+
+If you already use [Jan](/docs) or [Cortex](/cortex), cortex.llamacpp is bundled by default and you don’t need this guide. This guides walks you through how to use cortex.llamacpp as a standalone library, in any custom C++ server.
+
+## Usage
+
+To include cortex.llamacpp in your own server implementation, follow this [server example](https://github.com/janhq/cortex.llamacpp/tree/main/examples/server).
+
+## Interface
+
+Cortex.llamacpp has the following Interfaces:
+
+- **HandleChatCompletion:** Processes chat completion tasks
+
+ ```cpp
+ void HandleChatCompletion(
+ std::shared_ptr jsonBody,
+ std::function&& callback);
+ ```
+
+- **HandleEmbedding:** Generates embeddings for the input data provided
+
+ ```cpp
+ void HandleEmbedding(
+ std::shared_ptr jsonBody,
+ std::function&& callback);
+ ```
+
+- **LoadModel:** Loads a model based on the specifications
+
+ ```cpp
+ void LoadModel(
+ std::shared_ptr jsonBody,
+ std::function&& callback);
+ ```
+
+- **UnloadModel:** Unloads a model as specified
+
+ ```cpp
+ void UnloadModel(
+ std::shared_ptr jsonBody,
+ std::function&& callback);
+ ```
+
+- **GetModelStatus:** Retrieves the status of a model
+
+ ```cpp
+ void GetModelStatus(
+ std::shared_ptr jsonBody,
+ std::function&& callback);
+ ```
+
+**Parameters:**
+
+- **`jsonBody`**: The request content in JSON format.
+- **`callback`**: A function that handles the response
+
+## Architecture
+
+The main components include:
+- `enginei`: an engine interface definition that extends to all engines, handling endpoint logic and facilitating communication between `cortex.cpp` and `llama engine`.
+- `llama engine`: exposes APIs for embedding and inference. It loads and unloads models and simplifies API calls to `llama.cpp`.
+- `llama.cpp`: submodule from the `llama.cpp` repository that provides the core functionality for embeddings and inferences.
+- `llama server context`: a wrapper offers a simpler and more user-friendly interface for `llama.cpp` APIs
+
+![Cortex llamacpp architecture](./_assets/cortex-llamacpp-arch.png)
+
+### Communication Protocols:
+
+- `Streaming`: Responses are processed and returned one token at a time.
+- `RESTful`: The response is processed as a whole. After the llama server context completes the entire process, it returns a single result back to cortex.cpp.
+
+![Cortex llamacpp architecture](./_assets/cortex-llamacpp-act.png)
+
+## Code Structure
+
+```
+.
+├── base # Engine interface definition
+| └── cortex-common # Common interfaces used for all engines
+| └── enginei.h # Define abstract classes and interface methods for engines
+├── examples # Server example to integrate engine
+│ └── server.cc # Example server demonstrating engine integration
+├── llama.cpp # Upstream llama.cpp repository
+│ └── (files from upstream llama.cpp)
+├── src # Source implementation for llama.cpp
+│ ├── chat_completion_request.h # OpenAI compatible request handling
+│ ├── llama_client_slot # Manage vector of slots for parallel processing
+│ ├── llama_engine # Implementation of llamacpp engine for model loading and inference
+│ ├── llama_server_context # Context management for chat completion requests
+│ │ ├── slot # Struct for slot management
+│ │ └── llama_context # Struct for llama context management
+| | └── chat_completion # Struct for chat completion management
+| | └── embedding # Struct for embedding management
+├── third-party # Dependencies of the cortex.llamacpp project
+│ └── (list of third-party dependencies)
+```
+
+## Runtime
+
+## Roadmap
+The future plans for Cortex.llamacpp are focused on enhancing performance and expanding capabilities. Key areas of improvement include:
+
+- Performance Enhancements: Optimizing speed and reducing memory usage to ensure efficient processing of tasks.
+- Multimodal Model Compatibility: Expanding support to include a variety of multimodal models, enabling a broader range of applications and use cases.
+
+To follow the latest developments, see the [cortex.llamacpp GitHub](https://github.com/janhq/cortex.llamacpp)
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cortex-openvino.mdx b/docs/src/pages/cortex/cortex-openvino.mdx
new file mode 100644
index 0000000000..391902cf2b
--- /dev/null
+++ b/docs/src/pages/cortex/cortex-openvino.mdx
@@ -0,0 +1,24 @@
+---
+title: Cortex.OpenVino
+description: Cortex.OpenVino Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+# Cortex.OpenVino
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cortex-python.mdx b/docs/src/pages/cortex/cortex-python.mdx
new file mode 100644
index 0000000000..2aef1e7cec
--- /dev/null
+++ b/docs/src/pages/cortex/cortex-python.mdx
@@ -0,0 +1,24 @@
+---
+title: Cortex.python
+description: Cortex.python Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+# Cortex.python
\ No newline at end of file
diff --git a/docs/src/pages/cortex/cortex-tensorrt-llm.mdx b/docs/src/pages/cortex/cortex-tensorrt-llm.mdx
new file mode 100644
index 0000000000..e582b1cc8e
--- /dev/null
+++ b/docs/src/pages/cortex/cortex-tensorrt-llm.mdx
@@ -0,0 +1,24 @@
+---
+title: Cortex.tensorrt-llm
+description: Cortex.tensorrt-llm Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+# Cortex.tensorrt-llm
\ No newline at end of file
diff --git a/docs/src/pages/cortex/embeddings.mdx b/docs/src/pages/cortex/embeddings.mdx
new file mode 100644
index 0000000000..a841867108
--- /dev/null
+++ b/docs/src/pages/cortex/embeddings.mdx
@@ -0,0 +1,22 @@
+---
+title: Embeddings
+description: Embeddings
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/embeddings/overview.mdx b/docs/src/pages/cortex/embeddings/overview.mdx
new file mode 100644
index 0000000000..e679cd7634
--- /dev/null
+++ b/docs/src/pages/cortex/embeddings/overview.mdx
@@ -0,0 +1,22 @@
+---
+title: Overview
+description: Overview.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/error-codes.mdx b/docs/src/pages/cortex/error-codes.mdx
new file mode 100644
index 0000000000..878c4d66a2
--- /dev/null
+++ b/docs/src/pages/cortex/error-codes.mdx
@@ -0,0 +1,22 @@
+---
+title: Error Codes
+description: Error Codes.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/ext-architecture.mdx b/docs/src/pages/cortex/ext-architecture.mdx
new file mode 100644
index 0000000000..c2230c419e
--- /dev/null
+++ b/docs/src/pages/cortex/ext-architecture.mdx
@@ -0,0 +1,22 @@
+---
+title: Extensions Architecture
+description: Extensions Architecture
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/fine-tuning.mdx b/docs/src/pages/cortex/fine-tuning.mdx
new file mode 100644
index 0000000000..7bf80bc11a
--- /dev/null
+++ b/docs/src/pages/cortex/fine-tuning.mdx
@@ -0,0 +1,22 @@
+---
+title: Fine Tuning
+description: Fine Tuning
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/fine-tuning/overview.mdx b/docs/src/pages/cortex/fine-tuning/overview.mdx
new file mode 100644
index 0000000000..e679cd7634
--- /dev/null
+++ b/docs/src/pages/cortex/fine-tuning/overview.mdx
@@ -0,0 +1,22 @@
+---
+title: Overview
+description: Overview.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/function-calling.mdx b/docs/src/pages/cortex/function-calling.mdx
new file mode 100644
index 0000000000..eca57c9829
--- /dev/null
+++ b/docs/src/pages/cortex/function-calling.mdx
@@ -0,0 +1,22 @@
+---
+title: Function Calling
+description: Function Calling
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/hardware.mdx b/docs/src/pages/cortex/hardware.mdx
new file mode 100644
index 0000000000..fdb16a4e86
--- /dev/null
+++ b/docs/src/pages/cortex/hardware.mdx
@@ -0,0 +1,50 @@
+---
+title: Hardware Requirements
+description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
+sidebar_position: 2
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout, Steps } from 'nextra/components'
+
+# Hardware Requirements
+
+To run LLMs on device, Cortex has the following hardware requirements:
+
+These are the general hardware requirements for running Cortex on your system. Please refer to the respective [installation](/cortex/installation) sections for detailed specifications tailored to each environment.
+
+
+
+## OS
+- MacOSX 13.6 or higher.
+- Windows 10 or higher.
+- Ubuntu 12.04 and later.
+
+## RAM (CPU Mode)
+- 8GB for running up to 3B models.
+- 16GB for running up to 7B models.
+- 32GB for running up to 13B models.
+
+## VRAM (GPU Mode)
+- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+
+## Disk Space
+- 10GB: The app is 1.02 MB, but models are usually 4GB+
\ No newline at end of file
diff --git a/docs/src/pages/cortex/index.mdx b/docs/src/pages/cortex/index.mdx
new file mode 100644
index 0000000000..887eee2461
--- /dev/null
+++ b/docs/src/pages/cortex/index.mdx
@@ -0,0 +1,50 @@
+---
+title: Cortex
+description: Cortex is an Local LLM engine for developers
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Discord integration,
+ Discord,
+ bot,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Cortex
+
+🚧 Cortex is under construction.
+
+
+![Cortex Cover Image](./_assets/cortex-cover.png)
+
+Cortex is an [OpenAI compatible](https://platform.openai.com/docs/introduction), local AI server that developers can use to build LLM apps. It can be used as a standalone server, or imported as a library.
+
+Cortex currently supports two inference engines:
+- Llama.cpp
+- TensorRT-LLM
+
+
+ **Real-world Use**: Cortex powers [Jan](/docs), our local ChatGPT-alternative.
+
+ Cortex has been battle-tested through 900k downloads, and handles a variety of hardware and software edge cases.
+
+
+### Roadmap
+
+Cortex's roadmap is to implement an [OpenAI-equivalent API](https://platform.openai.com/docs/api-reference) using a fully open source stack. Our goal is to make switching to open source AI as easy as possible for developers.
+
+### Architecture
+
+Cortex's [architecture](/cortex/architecture) features C++ inference core, with [higher-order features](/cortex/architecture) handled in Typescript.
+
+Our [long-term direction](/cortex/roadmap) is to (eventually) move towards being a full C++ library to enable embedded and robotics use cases.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation.mdx b/docs/src/pages/cortex/installation.mdx
new file mode 100644
index 0000000000..2c32bac3f2
--- /dev/null
+++ b/docs/src/pages/cortex/installation.mdx
@@ -0,0 +1,37 @@
+---
+title: Desktop Installation
+description: Cortex Desktop Installation.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+import childPages from './installation/_meta.json';
+
+# Cortex Desktop Installation
+
+
+
+ (
+
+ ))}
+/>
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation/_meta.json b/docs/src/pages/cortex/installation/_meta.json
new file mode 100644
index 0000000000..4929f731a1
--- /dev/null
+++ b/docs/src/pages/cortex/installation/_meta.json
@@ -0,0 +1,14 @@
+{
+ "mac": {
+ "title": "Mac",
+ "href": "/cortex/installation/mac"
+ },
+ "windows": {
+ "title": "Windows",
+ "href": "/cortex/installation/windows"
+ },
+ "linux": {
+ "title": "Linux",
+ "href": "/cortex/installation/linux"
+ }
+}
diff --git a/docs/src/pages/cortex/installation/linux.mdx b/docs/src/pages/cortex/installation/linux.mdx
new file mode 100644
index 0000000000..2d49f27148
--- /dev/null
+++ b/docs/src/pages/cortex/installation/linux.mdx
@@ -0,0 +1,181 @@
+---
+title: Linux
+description: Install Cortex CLI on Linux.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ "desktop"
+ ]
+---
+
+import { Tabs, Steps } from 'nextra/components'
+import { Callout } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+
+🚧 Cortex is under construction.
+
+# Linux Installation
+## Prerequisites
+
+### Dependencies
+
+Before installation, ensure that you have installed the following:
+
+- **Node.js**: Required for running the installation.
+- **NPM**: Needed to manage packages.
+
+
+The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
+
+
+
+### Hardware
+
+Ensure that your system meets the following requirements to run Cortex:
+
+
+- Debian-based (Supports `.deb` and `AppImage` )
+ - Ubuntu-based
+ - Ubuntu Desktop LTS (official)/ Ubuntu Server LTS (only for server)
+ - Edubuntu (Mainly desktop)
+ - Kubuntu (Desktop only)
+ - Lubuntu (Both desktop and server, though mainly desktop)
+ - Ubuntu Budgie (Mainly desktop)
+ - Ubuntu Cinnamon (Desktop only)
+ - Ubuntu Kylin (Both desktop and server)
+ - Ubuntu MATE (Desktop only)
+- Pacman-based
+ - Arch Linux based
+ - Arch Linux (Mainly desktop)
+ - SteamOS (Desktop only)
+- RPM-based (Supports `.rpm` and `AppImage` )
+- Fedora-based
+ - RHEL-based (Server only)
+- openSUSE (Both desktop and server)
+
+
+ - Please check whether your Linux distribution supports desktop, server, or both environments.
+
+
+
+
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Haswell processors (Q2 2013) and newer.
+- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Excavator processors (Q2 2015) and newer.
+
+
+
+
+- 8GB for running up to 3B models (int4).
+- 16GB for running up to 7B models (int4).
+- 32GB for running up to 13B models (int4).
+
+
+We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
+
+
+
+
+- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+
+
+Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
+
+
+
+
+- At least 10GB for app storage and model download.
+
+
+
+## Cortex Installation
+
+To install Cortex, follow the steps below:
+
+
+### Step 1: Install Cortex
+
+Run the following command to install Cortex globally on your machine:
+
+
+Install NPM on your machine before proceeding with this step.
+
+
+
+```sh
+# Install globally on your system
+npm i -g @janhq/cortex
+```
+
+Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
+
+
+
+### Step 2: Verify the Installation
+
+1. After installation, you can verify that Cortex is installed correctly by getting help information.
+
+```sh
+# Get the help information
+cortex -h
+```
+2. Cortex is ready to use!
+
+## Build from Source
+
+To install Cortex from the source, follow the steps below:
+
+1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev).
+2. Navigate to the `cortex-js` folder.
+3. Open the terminal and run the following command to build the Cortex project:
+
+```sh
+npx nest build
+```
+
+4. Make the `command.js` executable:
+
+```sh
+chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js'
+```
+
+5. Link the package globally:
+
+```sh
+npm link
+```
+6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
+## Uninstall Cortex
+
+Run the following command to uninstall Cortex globally on your machine:
+```sh
+# Uninstall globally on your system
+npm uninstall -g @janhq/cortex
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation/mac.mdx b/docs/src/pages/cortex/installation/mac.mdx
new file mode 100644
index 0000000000..9e5bd7e72a
--- /dev/null
+++ b/docs/src/pages/cortex/installation/mac.mdx
@@ -0,0 +1,147 @@
+---
+title: Mac
+description: Install Cortex CLI on Mac.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ "desktop"
+ ]
+---
+
+import { Tabs, Steps } from 'nextra/components'
+import { Callout } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+
+
+🚧 Cortex is under construction.
+
+# Mac Installation
+## Prerequisites
+
+### Dependencies
+
+Before installation, ensure that you have installed the following:
+
+- **Node.js**: Required for running the installation.
+- **NPM**: Needed to manage packages.
+
+
+The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
+
+
+
+### Hardware
+
+Ensure that your system meets the following requirements to run Cortex:
+
+
+
+
+- MacOSX 13.6 or higher.
+
+
+- 8GB for running up to 3B models.
+- 16GB for running up to 7B models.
+- 32GB for running up to 13B models.
+
+
+- At least 10GB for app and model download.
+
+
+
+
+
+
+- MacOSX 13.6 or higher.
+
+
+- 8GB for running up to 3B models.
+- 16GB for running up to 7B models.
+- 32GB for running up to 13B models.
+
+Apple Silicon Macs leverage Metal for GPU acceleration, providing faster performance than Intel Macs, which rely solely on CPU processing.
+
+
+
+
+- At least 10GB for app and model download.
+
+
+
+
+## Cortex Installation
+
+To install Cortex, follow the steps below:
+
+### Step 1: Install Cortex
+
+Run the following command to install Cortex globally on your machine:
+
+
+Install NPM on your machine before proceeding with this step.
+
+
+
+```sh
+# Install globally on your system
+npm i -g @janhq/cortex
+```
+
+Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
+
+
+
+### Step 2: Verify the Installation
+
+1. After installation, you can verify that Cortex is installed correctly by getting help information.
+
+```sh
+# Get the help information
+cortex -h
+```
+2. Cortex is ready to use!
+
+## Build from Source
+
+To install Cortex from the source, follow the steps below:
+
+1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev).
+2. Navigate to the `cortex-js` folder.
+3. Open the terminal and run the following command to build the Cortex project:
+
+```sh
+npx nest build
+```
+
+4. Make the `command.js` executable:
+
+```sh
+chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js'
+```
+
+5. Link the package globally:
+
+```sh
+npm link
+```
+6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
+## Uninstall Cortex
+
+Run the following command to uninstall Cortex globally on your machine:
+```sh
+# Uninstall globally using NPM
+npm uninstall -g @janhq/cortex
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/installation/windows.mdx b/docs/src/pages/cortex/installation/windows.mdx
new file mode 100644
index 0000000000..600ae0b074
--- /dev/null
+++ b/docs/src/pages/cortex/installation/windows.mdx
@@ -0,0 +1,198 @@
+---
+title: Windows
+description: Install Cortex CLI on Windows.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ "desktop"
+ ]
+---
+
+import { Tabs, Steps } from 'nextra/components'
+import { Callout } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+
+
+🚧 Cortex is under construction.
+
+# Windows Installation
+
+## Prerequisites
+
+### Dependencies
+
+Before installation, ensure that you have installed the following:
+
+- **Node.js**: Required for running the installation.
+- **NPM**: Needed to manage packages.
+- **Windows Subsystem Linux (Ubuntu)**: Required to install for WSL2 installation.
+
+
+The **CPU instruction sets** are not required for the initial installation of Cortex. This dependency will be automatically installed during the Cortex initialization if they are not already on your system.
+
+
+
+### Hardware
+
+Ensure that your system meets the following requirements to run Cortex:
+
+
+- Windows 10 or higher.
+
+
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Haswell processors (Q2 2013) and newer.
+- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Excavator processors (Q2 2015) and newer.
+
+
+
+
+- 8GB for running up to 3B models (int4).
+- 16GB for running up to 7B models (int4).
+- 32GB for running up to 13B models (int4).
+
+
+We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
+
+
+
+
+- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+
+
+Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
+
+
+
+
+- At least 10GB for app storage and model download.
+
+
+
+## Cortex Installation
+
+To install Cortex, follow the steps below:
+
+
+### Step 1: Install Cortex
+
+Run the following command to install Cortex globally on your machine:
+
+
+Install NPM on your machine before proceeding with this step.
+
+
+
+```sh
+# Install globally on your system
+npm i -g @janhq/cortex
+```
+
+Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
+
+
+
+### Step 2: Verify the Installation
+
+1. After installation, you can verify that Cortex is installed correctly by getting help information.
+
+```sh
+# Get the help information
+cortex -h
+```
+2. Cortex is ready to use!
+
+## Windows Subsystem Linux
+
+To install Cortex using the NPM package in WSL2, follow the steps below:
+
+### Step 1: Open your WSL2 Terminal
+
+Open your Linux terminal in WSL2. For WSL2, you can use the Linux distribution terminal, which is Ubuntu.
+
+### Step 2: Install Cortex
+
+Run the following command to install Cortex globally on your machine:
+
+
+Install NPM on your machine before proceeding with this step.
+
+
+
+```sh
+# Install globally on your system
+npm i -g @janhq/cortex
+```
+
+Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
+
+
+
+### Step 3: Verify the Installation
+
+After installation, you can verify that Cortex is installed correctly by getting help information.
+
+```sh
+# Get the help information
+cortex -h
+```
+
+## Build from Source
+
+To install Cortex from the source, follow the steps below:
+
+1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev).
+2. Navigate to the `cortex-js` folder.
+3. Open the terminal and run the following command to build the Cortex project:
+
+```sh
+npx nest build
+```
+
+4. Make the `command.js` executable:
+
+```sh
+node "[path-to]\cortex\cortex-js\dist\src\command.js"
+```
+
+5. Link the package globally:
+
+```sh
+npm link
+```
+6. Initialize Cortex by following the steps [here](#step-3-initialize-cortex).
+
+## Uninstall Cortex
+
+Run the following command to uninstall Cortex globally on your machine:
+```sh
+# Uninstall globally on your system
+npm uninstall -g @janhq/cortex
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/model-operations.mdx b/docs/src/pages/cortex/model-operations.mdx
new file mode 100644
index 0000000000..5731fe34c0
--- /dev/null
+++ b/docs/src/pages/cortex/model-operations.mdx
@@ -0,0 +1,22 @@
+---
+title: Model Operations
+description: Model Operations
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/model-operations/overview.mdx b/docs/src/pages/cortex/model-operations/overview.mdx
new file mode 100644
index 0000000000..e679cd7634
--- /dev/null
+++ b/docs/src/pages/cortex/model-operations/overview.mdx
@@ -0,0 +1,22 @@
+---
+title: Overview
+description: Overview.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/py-library.mdx b/docs/src/pages/cortex/py-library.mdx
new file mode 100644
index 0000000000..337bd5ad5d
--- /dev/null
+++ b/docs/src/pages/cortex/py-library.mdx
@@ -0,0 +1,69 @@
+---
+title: Python Library
+description: Cortex Python Library.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# Python Library
+Cortex also provides a Python client library that is a **direct substitute for OpenAI's** [Python library](https://github.com/openai/openai-python), enabling easy integration and streamlined workflows.
+
+## Installation
+Use the following pip command to install the Cortex library in your project:
+```py
+pip install @janhq/cortex-python
+```
+## Usage
+
+Switching to the Cortex Client Library from the OpenAI Python Library involves simple updates.
+1. Replace the OpenAI import with Cortex in your application:
+```diff
+- from openai import OpenAI
++ from @janhq/cortex-python import Cortex
+```
+2. Modify the initialization of the client to use Cortex:
+```diff
+- client = OpenAI(api_key='your-api-key')
++ client = Cortex(base_url="BASE_URL", api_key="API_KEY") # This can be omitted if using the default
+
+```
+### Example Usage
+```py
+from @janhq/cortex-python import Cortex
+
+client = OpenAI(base_url="http://localhost:1337", api_key="cortex")
+
+model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+client.models.start(model=model)
+
+completion = client.chat.completions.create(
+ model=model,
+ messages=[
+ {
+ "role": "user",
+ "content": "Say this is a test",
+ },
+ ],
+)
+print(completion.choices[0].message.content)
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/quickstart.mdx b/docs/src/pages/cortex/quickstart.mdx
new file mode 100644
index 0000000000..4b520a20c9
--- /dev/null
+++ b/docs/src/pages/cortex/quickstart.mdx
@@ -0,0 +1,55 @@
+---
+title: Quickstart
+description: Cortex Quickstart.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+# Quickstart
+
+
+🚧 Cortex is under construction.
+
+
+To get started, confirm that your system meets the [hardware requirements](/cortex/hardware), and follow the steps below:
+
+```bash
+# 1. Install Cortex using NPM
+npm i -g @janhq/cortex
+
+# 2. Download a GGUF model
+cortex models pull llama3
+
+# 3. Run the model to start chatting
+cortex models run llama3
+
+# 4. (Optional) Run Cortex in OpenAI-compatible server mode
+cortex serve
+```
+
+For more details regarding the Cortex server mode, please see here:
+- [Server Endpoint](/cortex/server)
+- [`cortex serve` command](/cortex/cli/serve)
+
+
+## What's Next?
+With Cortex now fully operational, you're ready to delve deeper:
+- Explore how to [install Cortex](/cortex/installation) across various hardware environments.
+- Familiarize yourself with the comprehensive set of [Cortex CLI commands](/cortex/cli) available for use.
+- Gain insights into the system’s design by examining the [architecture](/cortex/architecture) of Cortex.
\ No newline at end of file
diff --git a/docs/src/pages/cortex/rag.mdx b/docs/src/pages/cortex/rag.mdx
new file mode 100644
index 0000000000..89ab5aecde
--- /dev/null
+++ b/docs/src/pages/cortex/rag.mdx
@@ -0,0 +1,22 @@
+---
+title: RAG
+description: RAG
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/rag/overview.mdx b/docs/src/pages/cortex/rag/overview.mdx
new file mode 100644
index 0000000000..e679cd7634
--- /dev/null
+++ b/docs/src/pages/cortex/rag/overview.mdx
@@ -0,0 +1,22 @@
+---
+title: Overview
+description: Overview.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/server.mdx b/docs/src/pages/cortex/server.mdx
new file mode 100644
index 0000000000..b126612345
--- /dev/null
+++ b/docs/src/pages/cortex/server.mdx
@@ -0,0 +1,47 @@
+---
+title: Command Line Interface
+description: Cortex CLI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps, Cards, Card } from 'nextra/components'
+import OAICoverage from "@/components/OAICoverage"
+
+
+🚧 Cortex is under construction.
+
+
+# Server Endpoint
+
+Cortex can run in headless server mode, providing an [OpenAI-API compatible](https://platform.openai.com/docs/api-reference/introduction) endpoint.
+
+## Usage
+
+```
+cortex serve
+```
+
+A full, local AI server will be started on port `7331` (customizable).
+
+## Playground
+
+You can open up an interactive playground at: http://localhost:1337/api, generated from Swagger.
+
+
+## OpenAI Coverage
+
+
\ No newline at end of file
diff --git a/docs/src/pages/cortex/text-generation.mdx b/docs/src/pages/cortex/text-generation.mdx
new file mode 100644
index 0000000000..9e903bc875
--- /dev/null
+++ b/docs/src/pages/cortex/text-generation.mdx
@@ -0,0 +1,86 @@
+---
+title: Overview
+description: Overview.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+import { Tabs } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+# Text Generation
+
+Cortex's Chat API is compatible with OpenAI’s [Chat Completions](https://platform.openai.com/docs/api-reference/chat) endpoint. It is a drop-in replacement for local inference.
+
+For local inference, Cortex is [multi-engine](#multiple-local-engines) and supports the following model formats:
+
+- `GGUF`: A generalizable LLM format that runs across CPUs and GPUs. Cortex implements a GGUF runtime through [llama.cpp](https://github.com/ggerganov/llama.cpp/).
+- `TensorRT`: A a production-ready, enterprise-grade LLM format optimized for fast inference on NVIDIA GPUs. Cortex implements a TensorRT runtime through [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM).
+
+For remote inference, Cortex routes requests to multiple APIs, while providing a single, easy to use, OpenAI compatible endpoint. [Read more](#remote-api-integration).
+
+## Usage
+
+
+
+
+```bash
+# Streaming
+cortex chat --model janhq/TinyLlama-1.1B-Chat-v1.0-GGUF
+```
+
+
+
+**Read more:**
+
+- Chat Completion Object
+- Chat Completions API
+- Chat Completions CLI
+
+## Capabilities
+
+### Multiple Local Engines
+
+Cortex scales applications from prototype to production. It runs on CPU-only laptops with Llama.cpp and GPU-accelerated clusters with TensorRT-LLM.
+
+To learn more about how to configure each engine:
+
+- Use llama.cpp
+- Use tensorrt-llm
+
+To learn more about our engine architecture:
+
+- cortex.cpp
+- cortex.llamacpp
+- cortex.tensorRTLLM
+
+### Multiple Remote APIs
+
+Cortex also works as an aggregator to make remote inference requests from a single endpoint.
+
+Currently, Cortex supports:
+- OpenAI
+- Groq
+- Cohere
+- Anthropic
+- MistralAI
+- Martian
+- OpenRouter
+
diff --git a/docs/src/pages/cortex/ts-library.mdx b/docs/src/pages/cortex/ts-library.mdx
new file mode 100644
index 0000000000..1344050b59
--- /dev/null
+++ b/docs/src/pages/cortex/ts-library.mdx
@@ -0,0 +1,66 @@
+---
+title: Typescript Library
+description: Cortex Node Client Library
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+🚧 Cortex is under construction.
+
+
+# Typescript Library
+Cortex provides a robust Typescript client library designed as a **direct substitute for OpenAI's** [Node.js/Typescript library](https://github.com/openai/openai-node), enabling easy integration and streamlined workflows.
+
+## Installation
+Install the package via npm with the following command in your project:
+```js
+npm install @janhq/cortex-node
+```
+
+## Usage
+
+Transitioning to the Cortex Client Library from the OpenAI Client Library involves minimal changes, mostly updating the import statement.
+1. Replace the OpenAI import with Cortex in your application:
+```diff
+- import OpenAI from 'openai';
++ import { Cortex } from '@janhq/cortex-node';
+```
+2. Modify the initialization of the client to use Cortex:
+```diff
+- const openai = new OpenAI({
++ const cortex = new Cortex({
+ baseURL: ['BASE_URL'], // The default base URL for Cortex is 'http://localhost:1337'
+ apiKey: process.env['OPENAI_API_KEY'], // This can be omitted if using the default
+});
+
+```
+### Example Usage
+```js
+import { Cortex } from '@janhq/cortex-node';
+
+const cortex = new Cortex({
+ baseURL: ['http://localhost:1337'],
+ apiKey: process.env['cortex'],
+});
+
+cortex.models.start('llama3:7b')
+cortex.models.stop('llama3:7b')
+cortex.threads.list()
+```
\ No newline at end of file
diff --git a/docs/src/pages/cortex/vision.mdx b/docs/src/pages/cortex/vision.mdx
new file mode 100644
index 0000000000..8701ffbd18
--- /dev/null
+++ b/docs/src/pages/cortex/vision.mdx
@@ -0,0 +1,22 @@
+---
+title: Vision
+description: Vision
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/cortex/vision/overview.mdx b/docs/src/pages/cortex/vision/overview.mdx
new file mode 100644
index 0000000000..e679cd7634
--- /dev/null
+++ b/docs/src/pages/cortex/vision/overview.mdx
@@ -0,0 +1,22 @@
+---
+title: Overview
+description: Overview.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Cortex,
+ Jan,
+ LLMs
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
\ No newline at end of file
diff --git a/docs/src/pages/docs/_assets/Anthropic-1.gif b/docs/src/pages/docs/_assets/Anthropic-1.gif
new file mode 100644
index 0000000000..594c53172a
Binary files /dev/null and b/docs/src/pages/docs/_assets/Anthropic-1.gif differ
diff --git a/docs/src/pages/docs/_assets/Anthropic-2.gif b/docs/src/pages/docs/_assets/Anthropic-2.gif
new file mode 100644
index 0000000000..63e76cc3bc
Binary files /dev/null and b/docs/src/pages/docs/_assets/Anthropic-2.gif differ
diff --git a/docs/src/pages/docs/_assets/Cohere-1.gif b/docs/src/pages/docs/_assets/Cohere-1.gif
new file mode 100644
index 0000000000..6441bd8a93
Binary files /dev/null and b/docs/src/pages/docs/_assets/Cohere-1.gif differ
diff --git a/docs/src/pages/docs/_assets/Cohere-2.gif b/docs/src/pages/docs/_assets/Cohere-2.gif
new file mode 100644
index 0000000000..9488040c01
Binary files /dev/null and b/docs/src/pages/docs/_assets/Cohere-2.gif differ
diff --git a/docs/src/pages/docs/_assets/Groq-1.gif b/docs/src/pages/docs/_assets/Groq-1.gif
new file mode 100644
index 0000000000..d95e577b14
Binary files /dev/null and b/docs/src/pages/docs/_assets/Groq-1.gif differ
diff --git a/docs/src/pages/docs/_assets/Groq-2.gif b/docs/src/pages/docs/_assets/Groq-2.gif
new file mode 100644
index 0000000000..46231696f0
Binary files /dev/null and b/docs/src/pages/docs/_assets/Groq-2.gif differ
diff --git a/docs/src/pages/docs/_assets/LM-Studio-v1.gif b/docs/src/pages/docs/_assets/LM-Studio-v1.gif
new file mode 100644
index 0000000000..65dfb5db1a
Binary files /dev/null and b/docs/src/pages/docs/_assets/LM-Studio-v1.gif differ
diff --git a/docs/src/pages/docs/_assets/LM-Studio-v2.gif b/docs/src/pages/docs/_assets/LM-Studio-v2.gif
new file mode 100644
index 0000000000..ad012df5d1
Binary files /dev/null and b/docs/src/pages/docs/_assets/LM-Studio-v2.gif differ
diff --git a/docs/src/pages/docs/_assets/LM-Studio-v3.gif b/docs/src/pages/docs/_assets/LM-Studio-v3.gif
new file mode 100644
index 0000000000..d5208c2bc5
Binary files /dev/null and b/docs/src/pages/docs/_assets/LM-Studio-v3.gif differ
diff --git a/docs/src/pages/docs/_assets/Martian-1.gif b/docs/src/pages/docs/_assets/Martian-1.gif
new file mode 100644
index 0000000000..3da994cc4a
Binary files /dev/null and b/docs/src/pages/docs/_assets/Martian-1.gif differ
diff --git a/docs/src/pages/docs/_assets/Martian-2.gif b/docs/src/pages/docs/_assets/Martian-2.gif
new file mode 100644
index 0000000000..27fa761b77
Binary files /dev/null and b/docs/src/pages/docs/_assets/Martian-2.gif differ
diff --git a/docs/src/pages/docs/_assets/Mistral-1.gif b/docs/src/pages/docs/_assets/Mistral-1.gif
new file mode 100644
index 0000000000..7d1e821084
Binary files /dev/null and b/docs/src/pages/docs/_assets/Mistral-1.gif differ
diff --git a/docs/src/pages/docs/_assets/Mistral-2.gif b/docs/src/pages/docs/_assets/Mistral-2.gif
new file mode 100644
index 0000000000..25c8d2fca4
Binary files /dev/null and b/docs/src/pages/docs/_assets/Mistral-2.gif differ
diff --git a/docs/src/pages/docs/_assets/Ollama-1.gif b/docs/src/pages/docs/_assets/Ollama-1.gif
new file mode 100644
index 0000000000..d9c7fa135d
Binary files /dev/null and b/docs/src/pages/docs/_assets/Ollama-1.gif differ
diff --git a/docs/src/pages/docs/_assets/Ollama-2.gif b/docs/src/pages/docs/_assets/Ollama-2.gif
new file mode 100644
index 0000000000..eadc6afdd6
Binary files /dev/null and b/docs/src/pages/docs/_assets/Ollama-2.gif differ
diff --git a/docs/src/pages/docs/_assets/Ollama-3.gif b/docs/src/pages/docs/_assets/Ollama-3.gif
new file mode 100644
index 0000000000..6b09000b95
Binary files /dev/null and b/docs/src/pages/docs/_assets/Ollama-3.gif differ
diff --git a/docs/src/pages/docs/_assets/OpenAi-1.gif b/docs/src/pages/docs/_assets/OpenAi-1.gif
new file mode 100644
index 0000000000..c98e2f2be2
Binary files /dev/null and b/docs/src/pages/docs/_assets/OpenAi-1.gif differ
diff --git a/docs/src/pages/docs/_assets/OpenAi-2.gif b/docs/src/pages/docs/_assets/OpenAi-2.gif
new file mode 100644
index 0000000000..8cd449f7fe
Binary files /dev/null and b/docs/src/pages/docs/_assets/OpenAi-2.gif differ
diff --git a/docs/src/pages/docs/_assets/OpenRouter-1.gif b/docs/src/pages/docs/_assets/OpenRouter-1.gif
new file mode 100644
index 0000000000..cd6a7898fa
Binary files /dev/null and b/docs/src/pages/docs/_assets/OpenRouter-1.gif differ
diff --git a/docs/src/pages/docs/_assets/OpenRouter-2.gif b/docs/src/pages/docs/_assets/OpenRouter-2.gif
new file mode 100644
index 0000000000..307b07debc
Binary files /dev/null and b/docs/src/pages/docs/_assets/OpenRouter-2.gif differ
diff --git a/docs/src/pages/docs/_assets/advance-set.png b/docs/src/pages/docs/_assets/advance-set.png
new file mode 100644
index 0000000000..41917ef88c
Binary files /dev/null and b/docs/src/pages/docs/_assets/advance-set.png differ
diff --git a/docs/src/pages/docs/_assets/advance-settings2.png b/docs/src/pages/docs/_assets/advance-settings2.png
new file mode 100644
index 0000000000..151063dca6
Binary files /dev/null and b/docs/src/pages/docs/_assets/advance-settings2.png differ
diff --git a/docs/src/pages/docs/_assets/amd.gif b/docs/src/pages/docs/_assets/amd.gif
new file mode 100644
index 0000000000..cc053b80cb
Binary files /dev/null and b/docs/src/pages/docs/_assets/amd.gif differ
diff --git a/docs/src/pages/docs/_assets/appearance.png b/docs/src/pages/docs/_assets/appearance.png
new file mode 100644
index 0000000000..46ba12e717
Binary files /dev/null and b/docs/src/pages/docs/_assets/appearance.png differ
diff --git a/docs/src/pages/docs/_assets/assistant-slider.png b/docs/src/pages/docs/_assets/assistant-slider.png
new file mode 100644
index 0000000000..8289e0d51b
Binary files /dev/null and b/docs/src/pages/docs/_assets/assistant-slider.png differ
diff --git a/docs/src/pages/docs/_assets/assistant1.gif b/docs/src/pages/docs/_assets/assistant1.gif
new file mode 100644
index 0000000000..75a56ba276
Binary files /dev/null and b/docs/src/pages/docs/_assets/assistant1.gif differ
diff --git a/docs/src/pages/docs/_assets/assistant2.gif b/docs/src/pages/docs/_assets/assistant2.gif
new file mode 100644
index 0000000000..44bfb2264e
Binary files /dev/null and b/docs/src/pages/docs/_assets/assistant2.gif differ
diff --git a/docs/src/pages/docs/_assets/asst.gif b/docs/src/pages/docs/_assets/asst.gif
new file mode 100644
index 0000000000..21797d7741
Binary files /dev/null and b/docs/src/pages/docs/_assets/asst.gif differ
diff --git a/docs/src/pages/docs/_assets/browser1.png b/docs/src/pages/docs/_assets/browser1.png
new file mode 100644
index 0000000000..61b1d4778b
Binary files /dev/null and b/docs/src/pages/docs/_assets/browser1.png differ
diff --git a/docs/src/pages/docs/_assets/browser2.png b/docs/src/pages/docs/_assets/browser2.png
new file mode 100644
index 0000000000..fbf3bcdb91
Binary files /dev/null and b/docs/src/pages/docs/_assets/browser2.png differ
diff --git a/docs/src/pages/docs/_assets/chat.gif b/docs/src/pages/docs/_assets/chat.gif
new file mode 100644
index 0000000000..94b8c27193
Binary files /dev/null and b/docs/src/pages/docs/_assets/chat.gif differ
diff --git a/docs/src/pages/docs/_assets/clean.png b/docs/src/pages/docs/_assets/clean.png
new file mode 100644
index 0000000000..a4f58c375e
Binary files /dev/null and b/docs/src/pages/docs/_assets/clean.png differ
diff --git a/docs/src/pages/docs/_assets/clear-logs.png b/docs/src/pages/docs/_assets/clear-logs.png
new file mode 100644
index 0000000000..348fc1d8f2
Binary files /dev/null and b/docs/src/pages/docs/_assets/clear-logs.png differ
diff --git a/docs/src/pages/docs/_assets/clear.gif b/docs/src/pages/docs/_assets/clear.gif
new file mode 100644
index 0000000000..445708d223
Binary files /dev/null and b/docs/src/pages/docs/_assets/clear.gif differ
diff --git a/docs/src/pages/docs/_assets/configure.gif b/docs/src/pages/docs/_assets/configure.gif
new file mode 100644
index 0000000000..96daaaa616
Binary files /dev/null and b/docs/src/pages/docs/_assets/configure.gif differ
diff --git a/docs/src/pages/docs/_assets/data-folder.png b/docs/src/pages/docs/_assets/data-folder.png
new file mode 100644
index 0000000000..def0f38c7b
Binary files /dev/null and b/docs/src/pages/docs/_assets/data-folder.png differ
diff --git a/docs/src/pages/docs/_assets/deeplink.gif b/docs/src/pages/docs/_assets/deeplink.gif
new file mode 100644
index 0000000000..615a66e560
Binary files /dev/null and b/docs/src/pages/docs/_assets/deeplink.gif differ
diff --git a/docs/src/pages/docs/_assets/default.gif b/docs/src/pages/docs/_assets/default.gif
new file mode 100644
index 0000000000..3bc6a68f67
Binary files /dev/null and b/docs/src/pages/docs/_assets/default.gif differ
diff --git a/docs/src/pages/docs/_assets/delete-data.png b/docs/src/pages/docs/_assets/delete-data.png
new file mode 100644
index 0000000000..178a00852f
Binary files /dev/null and b/docs/src/pages/docs/_assets/delete-data.png differ
diff --git a/docs/src/pages/docs/_assets/delete-tensor.gif b/docs/src/pages/docs/_assets/delete-tensor.gif
new file mode 100644
index 0000000000..86f45fc65a
Binary files /dev/null and b/docs/src/pages/docs/_assets/delete-tensor.gif differ
diff --git a/docs/src/pages/docs/_assets/delete-threads.png b/docs/src/pages/docs/_assets/delete-threads.png
new file mode 100644
index 0000000000..b134e8e124
Binary files /dev/null and b/docs/src/pages/docs/_assets/delete-threads.png differ
diff --git a/docs/src/pages/docs/_assets/delete.png b/docs/src/pages/docs/_assets/delete.png
new file mode 100644
index 0000000000..6b3f669c2b
Binary files /dev/null and b/docs/src/pages/docs/_assets/delete.png differ
diff --git a/docs/src/pages/docs/_assets/disable-tensor.gif b/docs/src/pages/docs/_assets/disable-tensor.gif
new file mode 100644
index 0000000000..91e9fe3d62
Binary files /dev/null and b/docs/src/pages/docs/_assets/disable-tensor.gif differ
diff --git a/docs/src/pages/docs/_assets/download-button.png b/docs/src/pages/docs/_assets/download-button.png
new file mode 100644
index 0000000000..2e8088d159
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-button.png differ
diff --git a/docs/src/pages/docs/_assets/download-button2.png b/docs/src/pages/docs/_assets/download-button2.png
new file mode 100644
index 0000000000..8f8c683c39
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-button2.png differ
diff --git a/docs/src/pages/docs/_assets/download-button3.png b/docs/src/pages/docs/_assets/download-button3.png
new file mode 100644
index 0000000000..cf05ec45aa
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-button3.png differ
diff --git a/docs/src/pages/docs/_assets/download-icon.png b/docs/src/pages/docs/_assets/download-icon.png
new file mode 100644
index 0000000000..f553e19f27
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-icon.png differ
diff --git a/docs/src/pages/docs/_assets/download-llama2.gif b/docs/src/pages/docs/_assets/download-llama2.gif
new file mode 100644
index 0000000000..18d671209d
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-llama2.gif differ
diff --git a/docs/src/pages/docs/_assets/download-model.gif b/docs/src/pages/docs/_assets/download-model.gif
new file mode 100644
index 0000000000..e10b293c26
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-model.gif differ
diff --git a/docs/src/pages/docs/_assets/download-model2.gif b/docs/src/pages/docs/_assets/download-model2.gif
new file mode 100644
index 0000000000..c69819a1ec
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-model2.gif differ
diff --git a/docs/src/pages/docs/_assets/download-phi.gif b/docs/src/pages/docs/_assets/download-phi.gif
new file mode 100644
index 0000000000..5b2cd6034b
Binary files /dev/null and b/docs/src/pages/docs/_assets/download-phi.gif differ
diff --git a/docs/src/pages/docs/_assets/download.gif b/docs/src/pages/docs/_assets/download.gif
new file mode 100644
index 0000000000..10cb4b4669
Binary files /dev/null and b/docs/src/pages/docs/_assets/download.gif differ
diff --git a/docs/src/pages/docs/_assets/download.png b/docs/src/pages/docs/_assets/download.png
new file mode 100644
index 0000000000..571e44dd30
Binary files /dev/null and b/docs/src/pages/docs/_assets/download.png differ
diff --git a/docs/src/pages/docs/_assets/exp-mode.png b/docs/src/pages/docs/_assets/exp-mode.png
new file mode 100644
index 0000000000..14ecf6bfe7
Binary files /dev/null and b/docs/src/pages/docs/_assets/exp-mode.png differ
diff --git a/docs/src/pages/docs/_assets/experimental.gif b/docs/src/pages/docs/_assets/experimental.gif
new file mode 100644
index 0000000000..11c2d86225
Binary files /dev/null and b/docs/src/pages/docs/_assets/experimental.gif differ
diff --git a/docs/src/pages/docs/_assets/extensions-page.png b/docs/src/pages/docs/_assets/extensions-page.png
new file mode 100644
index 0000000000..74a336769c
Binary files /dev/null and b/docs/src/pages/docs/_assets/extensions-page.png differ
diff --git a/docs/src/pages/docs/_assets/extensions-page2.png b/docs/src/pages/docs/_assets/extensions-page2.png
new file mode 100644
index 0000000000..51863aa99b
Binary files /dev/null and b/docs/src/pages/docs/_assets/extensions-page2.png differ
diff --git a/docs/src/pages/docs/_assets/gpt.gif b/docs/src/pages/docs/_assets/gpt.gif
new file mode 100644
index 0000000000..3972e812f4
Binary files /dev/null and b/docs/src/pages/docs/_assets/gpt.gif differ
diff --git a/docs/src/pages/docs/_assets/gpu-accel.png b/docs/src/pages/docs/_assets/gpu-accel.png
new file mode 100644
index 0000000000..294013fcf1
Binary files /dev/null and b/docs/src/pages/docs/_assets/gpu-accel.png differ
diff --git a/docs/src/pages/docs/_assets/gpu.gif b/docs/src/pages/docs/_assets/gpu.gif
new file mode 100644
index 0000000000..58d1da9704
Binary files /dev/null and b/docs/src/pages/docs/_assets/gpu.gif differ
diff --git a/docs/src/pages/docs/_assets/gpu2.gif b/docs/src/pages/docs/_assets/gpu2.gif
new file mode 100644
index 0000000000..fcacc5e99c
Binary files /dev/null and b/docs/src/pages/docs/_assets/gpu2.gif differ
diff --git a/docs/src/pages/docs/_assets/history.png b/docs/src/pages/docs/_assets/history.png
new file mode 100644
index 0000000000..7112a0d242
Binary files /dev/null and b/docs/src/pages/docs/_assets/history.png differ
diff --git a/docs/src/pages/docs/_assets/http.png b/docs/src/pages/docs/_assets/http.png
new file mode 100644
index 0000000000..afa0aeae02
Binary files /dev/null and b/docs/src/pages/docs/_assets/http.png differ
diff --git a/docs/src/pages/docs/_assets/https.gif b/docs/src/pages/docs/_assets/https.gif
new file mode 100644
index 0000000000..e9ed05b855
Binary files /dev/null and b/docs/src/pages/docs/_assets/https.gif differ
diff --git a/docs/src/pages/docs/_assets/hub.gif b/docs/src/pages/docs/_assets/hub.gif
new file mode 100644
index 0000000000..fad29d5db6
Binary files /dev/null and b/docs/src/pages/docs/_assets/hub.gif differ
diff --git a/docs/src/pages/docs/_assets/hub.png b/docs/src/pages/docs/_assets/hub.png
new file mode 100644
index 0000000000..75b0f575d1
Binary files /dev/null and b/docs/src/pages/docs/_assets/hub.png differ
diff --git a/docs/src/pages/docs/_assets/import-folder.gif b/docs/src/pages/docs/_assets/import-folder.gif
new file mode 100644
index 0000000000..fc78222070
Binary files /dev/null and b/docs/src/pages/docs/_assets/import-folder.gif differ
diff --git a/docs/src/pages/docs/_assets/import-hf.gif b/docs/src/pages/docs/_assets/import-hf.gif
new file mode 100644
index 0000000000..4ca1a8b2aa
Binary files /dev/null and b/docs/src/pages/docs/_assets/import-hf.gif differ
diff --git a/docs/src/pages/docs/_assets/import.png b/docs/src/pages/docs/_assets/import.png
new file mode 100644
index 0000000000..5ca7fff424
Binary files /dev/null and b/docs/src/pages/docs/_assets/import.png differ
diff --git a/docs/src/pages/docs/_assets/import2.png b/docs/src/pages/docs/_assets/import2.png
new file mode 100644
index 0000000000..caf2c022db
Binary files /dev/null and b/docs/src/pages/docs/_assets/import2.png differ
diff --git a/docs/src/pages/docs/_assets/inf.gif b/docs/src/pages/docs/_assets/inf.gif
new file mode 100644
index 0000000000..aaef44380f
Binary files /dev/null and b/docs/src/pages/docs/_assets/inf.gif differ
diff --git a/docs/src/pages/docs/_assets/install-ext.gif b/docs/src/pages/docs/_assets/install-ext.gif
new file mode 100644
index 0000000000..4357ed2365
Binary files /dev/null and b/docs/src/pages/docs/_assets/install-ext.gif differ
diff --git a/docs/src/pages/docs/_assets/install-ext.png b/docs/src/pages/docs/_assets/install-ext.png
new file mode 100644
index 0000000000..ee2ff8453c
Binary files /dev/null and b/docs/src/pages/docs/_assets/install-ext.png differ
diff --git a/docs/src/pages/docs/_assets/install-tensor.gif b/docs/src/pages/docs/_assets/install-tensor.gif
new file mode 100644
index 0000000000..927ee0b9b9
Binary files /dev/null and b/docs/src/pages/docs/_assets/install-tensor.gif differ
diff --git a/docs/src/pages/docs/_assets/install-tensor.png b/docs/src/pages/docs/_assets/install-tensor.png
new file mode 100644
index 0000000000..050075d844
Binary files /dev/null and b/docs/src/pages/docs/_assets/install-tensor.png differ
diff --git a/docs/src/pages/docs/_assets/instructions.gif b/docs/src/pages/docs/_assets/instructions.gif
new file mode 100644
index 0000000000..5ae101c2bf
Binary files /dev/null and b/docs/src/pages/docs/_assets/instructions.gif differ
diff --git a/docs/src/pages/docs/_assets/jan-display.png b/docs/src/pages/docs/_assets/jan-display.png
new file mode 100644
index 0000000000..2fc3610ba8
Binary files /dev/null and b/docs/src/pages/docs/_assets/jan-display.png differ
diff --git a/docs/src/pages/docs/_assets/linux.png b/docs/src/pages/docs/_assets/linux.png
new file mode 100644
index 0000000000..72230191ca
Binary files /dev/null and b/docs/src/pages/docs/_assets/linux.png differ
diff --git a/docs/src/pages/docs/_assets/llama2.gif b/docs/src/pages/docs/_assets/llama2.gif
new file mode 100644
index 0000000000..0c9502aa6e
Binary files /dev/null and b/docs/src/pages/docs/_assets/llama2.gif differ
diff --git a/docs/src/pages/docs/_assets/local-api1.png b/docs/src/pages/docs/_assets/local-api1.png
new file mode 100644
index 0000000000..d636d37ad9
Binary files /dev/null and b/docs/src/pages/docs/_assets/local-api1.png differ
diff --git a/docs/src/pages/docs/_assets/local-api2.png b/docs/src/pages/docs/_assets/local-api2.png
new file mode 100644
index 0000000000..58652775c9
Binary files /dev/null and b/docs/src/pages/docs/_assets/local-api2.png differ
diff --git a/docs/src/pages/docs/_assets/local-api3.png b/docs/src/pages/docs/_assets/local-api3.png
new file mode 100644
index 0000000000..aa83726400
Binary files /dev/null and b/docs/src/pages/docs/_assets/local-api3.png differ
diff --git a/docs/src/pages/docs/_assets/local-api4.png b/docs/src/pages/docs/_assets/local-api4.png
new file mode 100644
index 0000000000..2a66c9c1f5
Binary files /dev/null and b/docs/src/pages/docs/_assets/local-api4.png differ
diff --git a/docs/src/pages/docs/_assets/local-api5.png b/docs/src/pages/docs/_assets/local-api5.png
new file mode 100644
index 0000000000..2dc80c6c70
Binary files /dev/null and b/docs/src/pages/docs/_assets/local-api5.png differ
diff --git a/docs/src/pages/docs/_assets/mac2.png b/docs/src/pages/docs/_assets/mac2.png
new file mode 100644
index 0000000000..aeb4b6bbcc
Binary files /dev/null and b/docs/src/pages/docs/_assets/mac2.png differ
diff --git a/docs/src/pages/docs/_assets/model-anthropic.gif b/docs/src/pages/docs/_assets/model-anthropic.gif
new file mode 100644
index 0000000000..8de07e6258
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-anthropic.gif differ
diff --git a/docs/src/pages/docs/_assets/model-cohere.gif b/docs/src/pages/docs/_assets/model-cohere.gif
new file mode 100644
index 0000000000..2bf5007ff7
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-cohere.gif differ
diff --git a/docs/src/pages/docs/_assets/model-groq.gif b/docs/src/pages/docs/_assets/model-groq.gif
new file mode 100644
index 0000000000..7c6d202807
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-groq.gif differ
diff --git a/docs/src/pages/docs/_assets/model-management1.png b/docs/src/pages/docs/_assets/model-management1.png
new file mode 100644
index 0000000000..a230a5e7e0
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-management1.png differ
diff --git a/docs/src/pages/docs/_assets/model-management2.png b/docs/src/pages/docs/_assets/model-management2.png
new file mode 100644
index 0000000000..86d6a893a3
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-management2.png differ
diff --git a/docs/src/pages/docs/_assets/model-martian.gif b/docs/src/pages/docs/_assets/model-martian.gif
new file mode 100644
index 0000000000..8bf4dbbf5d
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-martian.gif differ
diff --git a/docs/src/pages/docs/_assets/model-mistral.gif b/docs/src/pages/docs/_assets/model-mistral.gif
new file mode 100644
index 0000000000..a266619032
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-mistral.gif differ
diff --git a/docs/src/pages/docs/_assets/model-openai.gif b/docs/src/pages/docs/_assets/model-openai.gif
new file mode 100644
index 0000000000..b20aa9c0e9
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-openai.gif differ
diff --git a/docs/src/pages/docs/_assets/model-parameters.png b/docs/src/pages/docs/_assets/model-parameters.png
new file mode 100644
index 0000000000..ce608ac517
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-parameters.png differ
diff --git a/docs/src/pages/docs/_assets/model-tab.png b/docs/src/pages/docs/_assets/model-tab.png
new file mode 100644
index 0000000000..ea595bf197
Binary files /dev/null and b/docs/src/pages/docs/_assets/model-tab.png differ
diff --git a/docs/src/pages/docs/_assets/model.gif b/docs/src/pages/docs/_assets/model.gif
new file mode 100644
index 0000000000..e5abdf757e
Binary files /dev/null and b/docs/src/pages/docs/_assets/model.gif differ
diff --git a/docs/src/pages/docs/_assets/modelparam.gif b/docs/src/pages/docs/_assets/modelparam.gif
new file mode 100644
index 0000000000..f33cf5ab5b
Binary files /dev/null and b/docs/src/pages/docs/_assets/modelparam.gif differ
diff --git a/docs/src/pages/docs/_assets/models.gif b/docs/src/pages/docs/_assets/models.gif
new file mode 100644
index 0000000000..8dc4d7925f
Binary files /dev/null and b/docs/src/pages/docs/_assets/models.gif differ
diff --git a/docs/src/pages/docs/_assets/mymodels.png b/docs/src/pages/docs/_assets/mymodels.png
new file mode 100644
index 0000000000..d12664b86e
Binary files /dev/null and b/docs/src/pages/docs/_assets/mymodels.png differ
diff --git a/docs/src/pages/docs/_assets/nitro-network.png b/docs/src/pages/docs/_assets/nitro-network.png
new file mode 100644
index 0000000000..5f1f7104eb
Binary files /dev/null and b/docs/src/pages/docs/_assets/nitro-network.png differ
diff --git a/docs/src/pages/docs/_assets/nvidia-1.gif b/docs/src/pages/docs/_assets/nvidia-1.gif
new file mode 100644
index 0000000000..df28abc1fd
Binary files /dev/null and b/docs/src/pages/docs/_assets/nvidia-1.gif differ
diff --git a/docs/src/pages/docs/_assets/nvidia-2.gif b/docs/src/pages/docs/_assets/nvidia-2.gif
new file mode 100644
index 0000000000..4c628884af
Binary files /dev/null and b/docs/src/pages/docs/_assets/nvidia-2.gif differ
diff --git a/docs/src/pages/docs/_assets/phi.gif b/docs/src/pages/docs/_assets/phi.gif
new file mode 100644
index 0000000000..e8cd587d28
Binary files /dev/null and b/docs/src/pages/docs/_assets/phi.gif differ
diff --git a/docs/src/pages/docs/_assets/preserve.png b/docs/src/pages/docs/_assets/preserve.png
new file mode 100644
index 0000000000..459c940e26
Binary files /dev/null and b/docs/src/pages/docs/_assets/preserve.png differ
diff --git a/docs/src/pages/docs/_assets/quick-ask.gif b/docs/src/pages/docs/_assets/quick-ask.gif
new file mode 100644
index 0000000000..f4081ca3c3
Binary files /dev/null and b/docs/src/pages/docs/_assets/quick-ask.gif differ
diff --git a/docs/src/pages/docs/_assets/quick-ask.png b/docs/src/pages/docs/_assets/quick-ask.png
new file mode 100644
index 0000000000..34620569bb
Binary files /dev/null and b/docs/src/pages/docs/_assets/quick-ask.png differ
diff --git a/docs/src/pages/docs/_assets/reset-jan.png b/docs/src/pages/docs/_assets/reset-jan.png
new file mode 100644
index 0000000000..2c20ca6674
Binary files /dev/null and b/docs/src/pages/docs/_assets/reset-jan.png differ
diff --git a/docs/src/pages/docs/_assets/reset.gif b/docs/src/pages/docs/_assets/reset.gif
new file mode 100644
index 0000000000..6ff171f259
Binary files /dev/null and b/docs/src/pages/docs/_assets/reset.gif differ
diff --git a/docs/src/pages/docs/_assets/retrieval.gif b/docs/src/pages/docs/_assets/retrieval.gif
new file mode 100644
index 0000000000..38d3fbb581
Binary files /dev/null and b/docs/src/pages/docs/_assets/retrieval.gif differ
diff --git a/docs/src/pages/docs/_assets/retrieval1.png b/docs/src/pages/docs/_assets/retrieval1.png
new file mode 100644
index 0000000000..8b5d63bb89
Binary files /dev/null and b/docs/src/pages/docs/_assets/retrieval1.png differ
diff --git a/docs/src/pages/docs/_assets/retrieval2.png b/docs/src/pages/docs/_assets/retrieval2.png
new file mode 100644
index 0000000000..2c9443c444
Binary files /dev/null and b/docs/src/pages/docs/_assets/retrieval2.png differ
diff --git a/docs/src/pages/docs/_assets/scheme.png b/docs/src/pages/docs/_assets/scheme.png
new file mode 100644
index 0000000000..fe50d4b6a2
Binary files /dev/null and b/docs/src/pages/docs/_assets/scheme.png differ
diff --git a/docs/src/pages/docs/_assets/search-bar.png b/docs/src/pages/docs/_assets/search-bar.png
new file mode 100644
index 0000000000..b6e5597734
Binary files /dev/null and b/docs/src/pages/docs/_assets/search-bar.png differ
diff --git a/docs/src/pages/docs/_assets/select-openai.gif b/docs/src/pages/docs/_assets/select-openai.gif
new file mode 100644
index 0000000000..3196c4320d
Binary files /dev/null and b/docs/src/pages/docs/_assets/select-openai.gif differ
diff --git a/docs/src/pages/docs/_assets/server-anthropic.gif b/docs/src/pages/docs/_assets/server-anthropic.gif
new file mode 100644
index 0000000000..da684dbd46
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-anthropic.gif differ
diff --git a/docs/src/pages/docs/_assets/server-cohere.gif b/docs/src/pages/docs/_assets/server-cohere.gif
new file mode 100644
index 0000000000..72c9bd883e
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-cohere.gif differ
diff --git a/docs/src/pages/docs/_assets/server-groq.gif b/docs/src/pages/docs/_assets/server-groq.gif
new file mode 100644
index 0000000000..dcb3e03f7f
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-groq.gif differ
diff --git a/docs/src/pages/docs/_assets/server-llama2.gif b/docs/src/pages/docs/_assets/server-llama2.gif
new file mode 100644
index 0000000000..b1ebafc849
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-llama2.gif differ
diff --git a/docs/src/pages/docs/_assets/server-martian.gif b/docs/src/pages/docs/_assets/server-martian.gif
new file mode 100644
index 0000000000..a33dd7f198
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-martian.gif differ
diff --git a/docs/src/pages/docs/_assets/server-mistral.gif b/docs/src/pages/docs/_assets/server-mistral.gif
new file mode 100644
index 0000000000..8fdb6ef0f9
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-mistral.gif differ
diff --git a/docs/src/pages/docs/_assets/server-openai.gif b/docs/src/pages/docs/_assets/server-openai.gif
new file mode 100644
index 0000000000..fd10aa75b2
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-openai.gif differ
diff --git a/docs/src/pages/docs/_assets/server-openai2.gif b/docs/src/pages/docs/_assets/server-openai2.gif
new file mode 100644
index 0000000000..b4338d1221
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-openai2.gif differ
diff --git a/docs/src/pages/docs/_assets/server-phi.gif b/docs/src/pages/docs/_assets/server-phi.gif
new file mode 100644
index 0000000000..4abc916c12
Binary files /dev/null and b/docs/src/pages/docs/_assets/server-phi.gif differ
diff --git a/docs/src/pages/docs/_assets/set-model.gif b/docs/src/pages/docs/_assets/set-model.gif
new file mode 100644
index 0000000000..87de857322
Binary files /dev/null and b/docs/src/pages/docs/_assets/set-model.gif differ
diff --git a/docs/src/pages/docs/_assets/set-tensor.gif b/docs/src/pages/docs/_assets/set-tensor.gif
new file mode 100644
index 0000000000..22e8248825
Binary files /dev/null and b/docs/src/pages/docs/_assets/set-tensor.gif differ
diff --git a/docs/src/pages/docs/_assets/set-url.gif b/docs/src/pages/docs/_assets/set-url.gif
new file mode 100644
index 0000000000..b675b3ea45
Binary files /dev/null and b/docs/src/pages/docs/_assets/set-url.gif differ
diff --git a/docs/src/pages/docs/_assets/settings.png b/docs/src/pages/docs/_assets/settings.png
new file mode 100644
index 0000000000..e53f71d1ec
Binary files /dev/null and b/docs/src/pages/docs/_assets/settings.png differ
diff --git a/docs/src/pages/docs/_assets/shortcut.png b/docs/src/pages/docs/_assets/shortcut.png
new file mode 100644
index 0000000000..b7912f8387
Binary files /dev/null and b/docs/src/pages/docs/_assets/shortcut.png differ
diff --git a/docs/src/pages/docs/_assets/specific-model.gif b/docs/src/pages/docs/_assets/specific-model.gif
new file mode 100644
index 0000000000..2664bcc479
Binary files /dev/null and b/docs/src/pages/docs/_assets/specific-model.gif differ
diff --git a/docs/src/pages/docs/_assets/spell.png b/docs/src/pages/docs/_assets/spell.png
new file mode 100644
index 0000000000..92e9a6d8c8
Binary files /dev/null and b/docs/src/pages/docs/_assets/spell.png differ
diff --git a/docs/src/pages/docs/_assets/ssl.gif b/docs/src/pages/docs/_assets/ssl.gif
new file mode 100644
index 0000000000..b18f075c1d
Binary files /dev/null and b/docs/src/pages/docs/_assets/ssl.gif differ
diff --git a/docs/src/pages/docs/_assets/ssl.png b/docs/src/pages/docs/_assets/ssl.png
new file mode 100644
index 0000000000..970051744b
Binary files /dev/null and b/docs/src/pages/docs/_assets/ssl.png differ
diff --git a/docs/src/pages/docs/_assets/start-anthropic.gif b/docs/src/pages/docs/_assets/start-anthropic.gif
new file mode 100644
index 0000000000..9ca9386553
Binary files /dev/null and b/docs/src/pages/docs/_assets/start-anthropic.gif differ
diff --git a/docs/src/pages/docs/_assets/start-cohere.gif b/docs/src/pages/docs/_assets/start-cohere.gif
new file mode 100644
index 0000000000..b28fde4984
Binary files /dev/null and b/docs/src/pages/docs/_assets/start-cohere.gif differ
diff --git a/docs/src/pages/docs/_assets/start-groq.gif b/docs/src/pages/docs/_assets/start-groq.gif
new file mode 100644
index 0000000000..8e5ced12a1
Binary files /dev/null and b/docs/src/pages/docs/_assets/start-groq.gif differ
diff --git a/docs/src/pages/docs/_assets/start-martian.gif b/docs/src/pages/docs/_assets/start-martian.gif
new file mode 100644
index 0000000000..31a79d16dd
Binary files /dev/null and b/docs/src/pages/docs/_assets/start-martian.gif differ
diff --git a/docs/src/pages/docs/_assets/start-mistral.gif b/docs/src/pages/docs/_assets/start-mistral.gif
new file mode 100644
index 0000000000..1012a651a8
Binary files /dev/null and b/docs/src/pages/docs/_assets/start-mistral.gif differ
diff --git a/docs/src/pages/docs/_assets/start-router.gif b/docs/src/pages/docs/_assets/start-router.gif
new file mode 100644
index 0000000000..ca1737de03
Binary files /dev/null and b/docs/src/pages/docs/_assets/start-router.gif differ
diff --git a/docs/src/pages/docs/_assets/system-mili.png b/docs/src/pages/docs/_assets/system-mili.png
new file mode 100644
index 0000000000..0a0f28592d
Binary files /dev/null and b/docs/src/pages/docs/_assets/system-mili.png differ
diff --git a/docs/src/pages/docs/_assets/system-mili2.png b/docs/src/pages/docs/_assets/system-mili2.png
new file mode 100644
index 0000000000..6135c5a499
Binary files /dev/null and b/docs/src/pages/docs/_assets/system-mili2.png differ
diff --git a/docs/src/pages/docs/_assets/system-monitor.png b/docs/src/pages/docs/_assets/system-monitor.png
new file mode 100644
index 0000000000..04d6749cb5
Binary files /dev/null and b/docs/src/pages/docs/_assets/system-monitor.png differ
diff --git a/docs/src/pages/docs/_assets/system-monitor2.png b/docs/src/pages/docs/_assets/system-monitor2.png
new file mode 100644
index 0000000000..e03dfbf63b
Binary files /dev/null and b/docs/src/pages/docs/_assets/system-monitor2.png differ
diff --git a/docs/src/pages/docs/_assets/system-slider.png b/docs/src/pages/docs/_assets/system-slider.png
new file mode 100644
index 0000000000..409338f703
Binary files /dev/null and b/docs/src/pages/docs/_assets/system-slider.png differ
diff --git a/docs/src/pages/docs/_assets/system-slider2.png b/docs/src/pages/docs/_assets/system-slider2.png
new file mode 100644
index 0000000000..9114d32c32
Binary files /dev/null and b/docs/src/pages/docs/_assets/system-slider2.png differ
diff --git a/docs/src/pages/docs/_assets/tensor.png b/docs/src/pages/docs/_assets/tensor.png
new file mode 100644
index 0000000000..812640de34
Binary files /dev/null and b/docs/src/pages/docs/_assets/tensor.png differ
diff --git a/docs/src/pages/docs/_assets/theme.png b/docs/src/pages/docs/_assets/theme.png
new file mode 100644
index 0000000000..ae14f20546
Binary files /dev/null and b/docs/src/pages/docs/_assets/theme.png differ
diff --git a/docs/src/pages/docs/_assets/thread-settings.png b/docs/src/pages/docs/_assets/thread-settings.png
new file mode 100644
index 0000000000..a7cc60304c
Binary files /dev/null and b/docs/src/pages/docs/_assets/thread-settings.png differ
diff --git a/docs/src/pages/docs/_assets/thread-settings2.png b/docs/src/pages/docs/_assets/thread-settings2.png
new file mode 100644
index 0000000000..b0d728d43c
Binary files /dev/null and b/docs/src/pages/docs/_assets/thread-settings2.png differ
diff --git a/docs/src/pages/docs/_assets/title.png b/docs/src/pages/docs/_assets/title.png
new file mode 100644
index 0000000000..b06f771222
Binary files /dev/null and b/docs/src/pages/docs/_assets/title.png differ
diff --git a/docs/src/pages/docs/_assets/tools.png b/docs/src/pages/docs/_assets/tools.png
new file mode 100644
index 0000000000..8104f6336b
Binary files /dev/null and b/docs/src/pages/docs/_assets/tools.png differ
diff --git a/docs/src/pages/docs/_assets/turn-off.png b/docs/src/pages/docs/_assets/turn-off.png
new file mode 100644
index 0000000000..a16a26a5b9
Binary files /dev/null and b/docs/src/pages/docs/_assets/turn-off.png differ
diff --git a/docs/src/pages/docs/_assets/ui.gif b/docs/src/pages/docs/_assets/ui.gif
new file mode 100644
index 0000000000..946ad8fb9e
Binary files /dev/null and b/docs/src/pages/docs/_assets/ui.gif differ
diff --git a/docs/src/pages/docs/_assets/vulkan.png b/docs/src/pages/docs/_assets/vulkan.png
new file mode 100644
index 0000000000..e8104bcc79
Binary files /dev/null and b/docs/src/pages/docs/_assets/vulkan.png differ
diff --git a/docs/src/pages/docs/_assets/windows.png b/docs/src/pages/docs/_assets/windows.png
new file mode 100644
index 0000000000..69ef6f9a89
Binary files /dev/null and b/docs/src/pages/docs/_assets/windows.png differ
diff --git a/docs/src/pages/docs/_meta.json b/docs/src/pages/docs/_meta.json
new file mode 100644
index 0000000000..231f6a763a
--- /dev/null
+++ b/docs/src/pages/docs/_meta.json
@@ -0,0 +1,44 @@
+{
+ "-- Switcher": {
+ "type": "separator",
+ "title": "Switcher"
+ },
+ "get-started": {
+ "title": "GET STARTED",
+ "type": "separator"
+ },
+ "index": "Overview",
+ "quickstart": {
+ "title": "Quickstart"
+ },
+ "desktop": "Desktop",
+ "data-folder": "Jan Data Folder",
+ "user-guides": {
+ "title": "BASIC USAGE",
+ "type": "separator"
+ },
+ "models": "Models",
+ "tools": "Tools",
+ "assistants": "Assistants",
+ "threads": "Threads",
+ "settings": "Settings",
+ "shortcuts": "Keyboard Shortcuts",
+ "inference-engines": {
+ "title": "MODEL PROVIDER",
+ "type": "separator"
+ },
+ "built-in": "Built-in Models",
+ "remote-models": "Remote APIs",
+ "extensions-separator": {
+ "title": "EXTENSIONS",
+ "type": "separator"
+ },
+ "extensions": "Overview",
+ "installing-extension": "Installing an Extension",
+ "troubleshooting-separator": {
+ "title": "TROUBLESHOOTING",
+ "type": "separator"
+ },
+ "troubleshooting": "Troubleshooting",
+ "error-codes": "Error Codes"
+}
diff --git a/docs/src/pages/docs/assistants.mdx b/docs/src/pages/docs/assistants.mdx
new file mode 100644
index 0000000000..7f2af63e02
--- /dev/null
+++ b/docs/src/pages/docs/assistants.mdx
@@ -0,0 +1,34 @@
+---
+title: Assistants
+description: A step-by-step guide on customizing your assistant.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ manage assistants,
+ assistants,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Assistants
+This guide explains how to set the Assistant instructions in the Jan application.
+
+## Applied the Instructions to All Threads
+To apply the instructions to all the new threads, follow these steps:
+1. Select a **Thread**.
+2. Click the **Assistant** tab.
+3. Toggle the **slider** to ensure these instructions are applied to all new threads. (Activate the **Experimental Mode** feature to enable this option.)
+
+
+![Assistant Slider](./_assets/assistant-slider.png)
+
+
\ No newline at end of file
diff --git a/docs/src/pages/docs/built-in/_meta.json b/docs/src/pages/docs/built-in/_meta.json
new file mode 100644
index 0000000000..0b15c47f2b
--- /dev/null
+++ b/docs/src/pages/docs/built-in/_meta.json
@@ -0,0 +1,10 @@
+{
+ "llama-cpp": {
+ "title": "llama.cpp",
+ "href": "/docs/built-in/llama-cpp"
+ },
+ "tensorrt-llm": {
+ "title": "TensorRT-LLM",
+ "href": "/docs/built-in/tensorrt-llm"
+ }
+}
diff --git a/docs/src/pages/docs/built-in/llama-cpp.mdx b/docs/src/pages/docs/built-in/llama-cpp.mdx
new file mode 100644
index 0000000000..5b7b0453ae
--- /dev/null
+++ b/docs/src/pages/docs/built-in/llama-cpp.mdx
@@ -0,0 +1,137 @@
+---
+title: llama.cpp
+description: A step-by-step guide on how to customize the llama.cpp engine.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Llama CPP integration,
+ llama.cpp Engine,
+ Intel CPU,
+ AMD CPU,
+ NVIDIA GPU,
+ AMD GPU Radeon,
+ Apple Silicon,
+ Intel Arc GPU,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout, Steps } from 'nextra/components'
+
+# llama.cpp (Default)
+
+## Overview
+
+Jan has a default [C++ inference server](https://github.com/janhq/cortex) built on top of [llama.cpp](https://github.com/ggerganov/llama.cpp). This server provides an OpenAI-compatible API, queues, scaling, and additional features on top of the wide capabilities of `llama.cpp`.
+
+## llama.cpp Engine
+
+This guide shows you how to initialize the `llama.cpp` to download and install the required dependencies to start chatting with a model using the `llama.cpp` engine.
+
+## Prerequisites
+- Mac Intel:
+ - Make sure you're using an Intel-based Mac. For a complete list of supported Intel CPUs, please see [here](https://en.wikipedia.org/wiki/MacBook_Pro_(Intel-based)).
+ - For Mac Intel, it is recommended to utilize smaller models.
+- Mac Sillicon:
+ - Make sure you're using a Mac Silicon. For a complete list of supported Apple Silicon CPUs, please see [here](https://en.wikipedia.org/wiki/Apple_Silicon).
+ - Using an adequate model size based on your hardware is recommended for Mac Silicon.
+
+ This can use Apple GPU with Metal by default for acceleration. Apple ANE is not supported yet.
+
+- Windows:
+ - Ensure that you have **Windows with x86_64** architecture.
+- Linux:
+ - Ensure that you have **Linux with x86_64** architecture.
+
+#### GPU Acceleration Options
+Enable the GPU acceleration option within the Jan application by following the [Installation Setup](/docs/desktop-installation) guide.
+## Step-by-step Guide
+
+### Step 1: Open the `model.json`
+1. Navigate to the **Advanced Settings**.
+
+![Settings](../_assets/advance-set.png)
+
+2. On the **Jan Data Folder** click the **folder icon (📂)** to access the data.
+
+![Jan Data Folder](../_assets/data-folder.png)
+
+3. Select **models** folder > Click the **name** of the model folder that you want to modify > click the `model.json`.
+4. This will open up a `model.json`. For example, the `model.json` of `TinyLlama Chat 1.1B Q4` is shown below:
+```json
+{
+ "sources": [
+ {
+ "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+ "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ }
+ ],
+ "id": "tinyllama-1.1b",
+ "object": "model",
+ "name": "TinyLlama Chat 1.1B Q4",
+ "version": "1.0",
+ "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 4096,
+ "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
+ "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ },
+ "parameters": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 2048,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "TinyLlama",
+ "tags": [
+ "Tiny",
+ "Foundation Model"
+ ],
+ "size": 669000000
+ },
+ "engine": "nitro"
+}
+```
+### Step 2: Modify the `model.json`
+1. Modify the model's engine settings under the settings array. You can modify the settings with the following parameters:
+
+| Parameter | Type | Description |
+| --------------- | ----------- | ---------------------------------------------------------------------------------------------------------------------------------- |
+| `ctx_len` | **Integer** | Provides ample context for model operations like `GPT-3.5`. The default value is `2048` (_Maximum_: `4096`, _Minimum_: `1`). |
+| `prompt_template` | **String** | Defines the template used to format prompts |
+| `model_path` | **String** | Specifies the path to the model `.GGUF` file. |
+| `ngl` | **Integer** | Determines GPU layer usage. The default value is `100`. |
+| `cpu_threads` | **Integer** | Determines CPU inference threads, limited by hardware and OS. (_Maximum_ determined by system) |
+| `cont_batching` | **Integer** | Controls continuous batching, enhancing throughput for LLM inference. |
+| `embedding` | **Integer** | Enables embedding utilization for tasks like document-enhanced chat in RAG-based applications. |
+2. Save the `model.json` file.
+
+ If you use a different model, you must set it up again. As this only affects the selected model.
+
+### Step 3: Start the Model
+1. Restart the Jan application to apply your settings.
+2. Navigate to the **Threads**.
+3. Chat with your model.
+
+
+ - To utilize the embedding feature, include the JSON parameter `"embedding": true`. It will enable Nitro to process inferences with embedding capabilities. Please refer to the [Embedding in the Nitro documentation](https://nitro.jan.ai/features/embed) for a more detailed explanation.
+ - To utilize the continuous batching feature for boosting throughput and minimizing latency in large language model (LLM) inference, include `cont_batching: true`. For details, please refer to the [Continuous Batching in the Nitro documentation](https://nitro.jan.ai/features/cont-batch).
+
+
+
+
+ If you have questions, please join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.
+
diff --git a/docs/src/pages/docs/built-in/tensorrt-llm.mdx b/docs/src/pages/docs/built-in/tensorrt-llm.mdx
new file mode 100644
index 0000000000..b165dedc9b
--- /dev/null
+++ b/docs/src/pages/docs/built-in/tensorrt-llm.mdx
@@ -0,0 +1,91 @@
+---
+title: TensorRT-LLM
+description: A step-by-step guide on customizing the TensorRT-LLM engine.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ TensorRT-LLM Engine,
+ TensorRT,
+ tensorRT,
+ engine,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# TensorRT-LLM
+
+## Overview
+
+This guide walks you through installing Jan's official [TensorRT-LLM Engine](https://github.com/janhq/nitro-tensorrt-llm). This engine uses [Cortex-TensorRT-LLM](https://github.com/janhq/cortex.tensorrt-llm) as the AI engine instead of the default [Cortex-Llama-CPP](https://github.com/janhq/cortex). It includes an efficient C++ server that executes the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html) natively. It also includes features and performance improvements like OpenAI compatibility, tokenizer improvements, and queues.
+
+
+ This feature is only available for Windows users. Linux is coming soon.
+
+
+### Pre-requisites
+
+- A **Windows** PC.
+- **Nvidia GPU(s)**: Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon.
+- Sufficient disk space for the TensorRT-LLM models and data files (space requirements vary depending on the model size).
+
+
+
+### Step 1: Install TensorRT-Extension
+
+1. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](../_assets/settings.png)
+
+2. Select the **TensorRT-LLM** under the **Model Provider** section.
+
+![Click Tensor](../_assets/tensor.png)
+
+3. Click **Install** to install the required dependencies to use TensorRT-LLM.
+
+![Install Extension](../_assets/install-tensor.png)
+
+3. Check that files are correctly downloaded.
+
+```bash
+ls ~/jan/data/extensions/@janhq/tensorrt-llm-extension/dist/bin
+# Your Extension Folder should now include `nitro.exe`, among other artifacts needed to run TRT-LLM
+```
+
+### Step 2: Download a Compatible Model
+
+TensorRT-LLM can only run models in `TensorRT` format. These models, aka "TensorRT Engines", are prebuilt for each target OS+GPU architecture.
+
+We offer a handful of precompiled models for Ampere and Ada cards that you can immediately download and play with:
+
+1. Restart the application and go to the Hub.
+2. Look for models with the `TensorRT-LLM` label in the recommended models list > Click **Download**.
+
+
+ This step might take some time. 🙏
+
+
+![image](https://hackmd.io/_uploads/rJewrEgRp.png)
+
+3. Click **Download** to download the model.
+
+### Step 3: Configure Settings
+
+1. Navigate to the Thread section.
+2. Select the model that you have downloaded.
+3. Customize the default parameters of the model for how Jan runs TensorRT-LLM.
+
+ Please see [here](/docs/models/model-parameters) for more detailed model parameters.
+
+
+![Specific Conversation](../_assets/model-parameters.png)
+
+
\ No newline at end of file
diff --git a/docs/src/pages/docs/data-folder.mdx b/docs/src/pages/docs/data-folder.mdx
new file mode 100644
index 0000000000..8d79997596
--- /dev/null
+++ b/docs/src/pages/docs/data-folder.mdx
@@ -0,0 +1,250 @@
+---
+title: Jan Data Folder
+description: Discover the Structure of Jan Data.
+sidebar_position: 2
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout, Steps } from 'nextra/components'
+
+# Jan Data Folder
+Jan stores your data locally in your own filesystem in a universal file format (JSON). We build for privacy by default and do not collect or sell your data.
+
+This guide helps you understand where and how this data is stored.
+
+## Open the Data Folder
+
+To open the Jan data folder from the app:
+1. Click the System monitor button on your Jan app.
+2. Click the App Log button.
+3. This redirects you to the Jan data folder.
+
+```bash
+# Windows
+~/AppData/Roaming/Jan/data
+
+# Mac
+~/Library/Application\ Support/Jan/data
+
+# Linux
+## Custom installation directory
+$XDG_CONFIG_HOME = /home/username/custom_config
+
+or
+
+## Default installation directory
+~/.config/Jan/data
+
+```
+
+## Folder Structure
+Jan app data folder should have the following folder structure:
+
+Jan is stored in the root `~/jan` by default.
+
+```yaml
+/assistants
+ /jan
+ assistant.json
+/extensions
+ extensions.json
+ /@janhq
+ /extension_A
+ package.json
+/logs
+ /app.txt
+/models
+ /model_A
+ model.yaml
+ model_A.yaml
+/settings
+ settings.json
+ /@janhq
+ /extension_A_Settings
+ settings.json
+/themes
+ /dark-dimmed
+ /joi-dark
+ /joi-light
+ /night-blue
+/threads
+ /jan_thread_A
+ messages.jsonl
+ thread.json
+ messages.jsonl
+```
+### `jan/` (The Root Directory)
+
+This is the primary directory where all files related to Jan are stored. It typically resides in the user's home directory.
+
+### `assistants/`
+
+Stores configuration files for various AI assistants. Each assistant within this directory can have different settings.
+
+- **Default Assistant**: Located in `/assistants/jan/`, it includes an `assistant.json` configuring the default settings and capabilities. The default sample of `assistan.json` is as follows:
+
+```json
+{
+ "avatar": "",
+ "id": "jan",
+ "object": "assistant",
+ "created_at": 1715132389207,
+ "name": "Jan",
+ "description": "A default assistant that can use all downloaded models",
+ "model": "*",
+ "instructions": ""
+}
+
+```
+
+Each parameter in the file is defined as follows:
+
+| Parameter | Description | Type | Default Value |
+|-------------------------|-----------------------------------------------------------------------------------------------|---------|----------------------------------------------------|
+| id | Identifier for the assistant, defaults to "jan". | string | jan |
+| avatar | The avatar image for the assistant. | string | None |
+| object | Specifies the assistant type in OpenAI-compatible API responses. | string | None |
+| created_at | Timestamp indicating when the assistant was created. | string | None |
+| name | Display name for the assistant. | string | Jan |
+| description | A description of the assistant's role. | string | A default assistant that can use all downloaded models. |
+| model | Defines which models the assistant can use, with * indicating all models are available. | string | * |
+| instructions | Default instructions provided to new threads. | string | None |
+| file_ids | Field for referencing file IDs in OpenAI-compatible responses. | string | None |
+| tools | List of tools available for the assistant, with only the "retrieval" tool supported so far. | array | retrieval |
+| type | Specifies the type of tool, default is "retrieval". | string | retrieval |
+| enabled | Indicates whether the tool is enabled by default.| boolean | true |
+| useTimeWeightedRetriever | Controls if the time-weighted retrieval feature is enabled. | boolean | false |
+| settings | Configuration settings for the tool (retrieval tool in this case). | object | None |
+| top_k | Number of top results to return in retrieval, with a default of 2. | number | 2 |
+| chunk_size | Defines the size of text chunks to process for the retrieval tool. | number | 1024 |
+| chunk_overlap | Determines the amount of overlap between text chunks in the retrieval process. | number | 64 |
+| retrieval_template | Template for formatting the retrieval tool's responses to queries. | string | None |
+
+
+### `extensions/`
+
+Extensions enhance Jan's functionality by adding new capabilities or integrating external services.
+
+- **@janhq**: The root folder for all extensions by the author. Extensions with organization-level names (e.g., `@janhq/monitoring-extension`) are stored here. If there is no organization, the folder holds the extension directly (e.g., `example-extension`).
+
+- **extensions.json**: A file that lists all installed extensions and their metadata, taken from each extension’s `package.json`. It helps avoid scanning all extension folders, improving performance.
+
+### `logs/`
+
+Logs from the application are stored here. This is useful for troubleshooting and monitoring the application's behavior over time. The file `/logs/app.txt` uses prefixes to indicate the source of the logs:
+- **[APP]**: App logs
+- **[SERVER]**: API server logs
+- **[SPECS]**: Hardware information logs
+
+### `models/`
+
+Stores the AI models that the assistants use to process requests and generate responses.
+
+- **Model Configurations**: Each model directory, such as `/models/modelA/`, contains a `model.json` with settings specific to that model.
+
+
+To see the full list of `model.json` parameters, please see [here](/docs/models/model-parameters).
+
+
+### `settings/`
+
+General settings for the application are stored here, separate from individual assistant or engine configurations.
+
+- **Extension-specific Settings**: Additional settings for extensions are stored in respective subdirectories under `/settings/@janhq/`. Each parameter in the file is defined as follows:
+
+| Parameter | Description |
+|-----------------|----------------------------------------------------------------------|
+| `key` | The setting key. |
+| `title` | The setting title. |
+| `description` | The setting description. |
+| `controllerType`| The type of setting component (checkbox, input, slider, etc.). |
+| `controllerProps`| Properties of the controller (e.g., value, placeholder, textAlign). |
+| `extensionName` | The extension ID, used to map with the parent extension. |
+
+- **General Settings**: The `settings.json` in the `/settings/` directory holds application settings related to the GPU acceleration. Each parameter in the file is defined as follows:
+
+| Parameter | Description |
+|----------------------|---------------------------------------------------------------------------------------------|
+| `notify` | Whether notifications are enabled (true/false). |
+| `run_mode` | The mode the application is running in (e.g., "cpu"). |
+| `nvidia_driver.exist`| Whether the NVIDIA driver is present (true/false). |
+| `nvidia_driver.version`| The version of the installed NVIDIA driver. |
+| `cuda.exist` | Whether CUDA is available (true/false). |
+| `cuda.version` | The version of CUDA installed. |
+| `gpus[0].id` | The ID of the GPU (e.g., "0"). |
+| `gpus[0].vram` | The amount of VRAM for the GPU (in MB). |
+| `gpus[0].name` | The name of the GPU (e.g., "NVIDIA GeForce RTX 3050 Laptop GPU"). |
+| `gpus[0].arch` | The architecture of the GPU (e.g., "ampere"). |
+| `gpu_highest_vram` | The ID of the GPU with the highest VRAM. |
+| `gpus_in_use` | The list of GPU IDs currently in use (e.g., "0"). |
+| `is_initial` | Indicates whether it's the initial run (true/false). |
+| `vulkan` | Whether Vulkan support is available (true/false). |
+
+
+
+
+### `themes/`
+
+The `themes` directory contains different visual themes for the application, allowing customization of the user interface. Each of theme directory contains the `theme.json` that has the following parameters:
+
+| Parameter | Description |
+|----------------------|----------------------------------------------------------------------------------|
+| `id` | The theme's ID. |
+| `displayName` | Theme display name, as seen in theme settings. |
+| `reduceTransparent` | Setting to reduce transparency of the window/background. |
+| `nativeTheme` | Indicates whether the theme depends on the OS's light/dark settings. |
+| `variables` | Contains all possible component configurations. |
+
+### `threads/`
+
+Threads history is kept in this directory. Each session or thread is stored in a way that makes it easy to review past interactions. Each thread is stored in its subdirectory, such as `/threads/jan_unixstamp/`, with files like `messages.jsonl` and `thread.json` that contains the following parameters:
+
+- `messages.jsonl`: Array of OpenAI compatible message objects belong to the thread. For example:
+
+```jsonl
+ {"id":"01J6Y6FH8PFTHQB5PNJTHEN27C","thread_id":"jan_1725437954","type":"Thread","role":"assistant","content":
+ [{"type":"text","text":{"value":"Hello! Is there something I can help you with or would you like to chat?","annotations":
+ []}}],"status":"ready","created":1725442802966,"updated":1725442802966,"object":"thread.message"}
+```
+
+- `thread.json`: The thread's metadata that contains the following parameters:
+
+| Parameter | Description |
+|----------------|---------------------------------------------------------------------------------------------------------------------------------------------|
+| `id` | Thread's ID (can be generated by the folder name). |
+| `object` | "thread" (OpenAI-compatible field). |
+| `title` | Thread's title (editable in the GUI from the Thread List on the left panel). |
+| `assistants` | Contains cloned assistant metadata and specialized settings for the thread. Includes all assistant settings mentioned under the Jan assistant section. |
+| `model` | The selected model and its settings/parameters for the thread. Changes made by users to thread settings are written here, rather than in model.json. Also contains the ID and engine of the selected model for quick querying by extensions. |
+| `metadata` | Additional thread data, such as `lastMessage`, which provides GUI information but does not use OpenAI-compatible fields. |
+
+## Open the Data Folder
+
+To open the Jan data folder, follow the steps in the [Settings](/docs/settings#access-the-jan-data-folder) guide.
+
+## Delete Jan Data Folder
+
+If you have uninstalled the Jan app, you may also want to delete the Jan data folder. You can automatically remove this folder during uninstallation by selecting **OK** when prompted.
+
+![Delete Data Folder](./_assets/delete-data.png)
+
+If you missed this step and need to delete the folder manually, please follow these instructions:
+
+1. Go to the root data folder in your Users directory.
+2. Locate the Jan data folder.
+3. Delete the folder manually.
\ No newline at end of file
diff --git a/docs/src/pages/docs/desktop.mdx b/docs/src/pages/docs/desktop.mdx
new file mode 100644
index 0000000000..c7f0de4c9e
--- /dev/null
+++ b/docs/src/pages/docs/desktop.mdx
@@ -0,0 +1,35 @@
+---
+title: Desktop Installation
+description: Jan is a ChatGPT-alternative that runs on your computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Hardware Setup,
+ GPU,
+ ]
+---
+
+import { Cards, Card } from 'nextra/components'
+import childPages from './desktop/_meta.json';
+
+# Desktop Installation
+
+
+
+ (
+
+ ))}
+/>
\ No newline at end of file
diff --git a/docs/src/pages/docs/desktop/_meta.json b/docs/src/pages/docs/desktop/_meta.json
new file mode 100644
index 0000000000..5cc930af72
--- /dev/null
+++ b/docs/src/pages/docs/desktop/_meta.json
@@ -0,0 +1,14 @@
+{
+ "mac": {
+ "title": "Mac",
+ "href": "/docs/desktop/mac"
+ },
+ "windows": {
+ "title": "Windows",
+ "href": "/docs/desktop/windows"
+ },
+ "linux": {
+ "title": "Linux",
+ "href": "/docs/desktop/linux"
+ }
+}
diff --git a/docs/src/pages/docs/desktop/linux.mdx b/docs/src/pages/docs/desktop/linux.mdx
new file mode 100644
index 0000000000..d82ca4503d
--- /dev/null
+++ b/docs/src/pages/docs/desktop/linux.mdx
@@ -0,0 +1,323 @@
+---
+title: Linux
+description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ "desktop"
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+
+# Linux Installation
+To install Jan desktop on Linux, follow the steps below:
+## Compatibility
+
+Ensure that your system meets the following requirements to use Jan effectively:
+
+
+- Debian-based (Supports `.deb` and `AppImage` )
+ - Ubuntu-based
+ - Ubuntu Desktop LTS (official)/ Ubuntu Server LTS (only for server)
+ - Edubuntu (Mainly desktop)
+ - Kubuntu (Desktop only)
+ - Lubuntu (Both desktop and server, though mainly desktop)
+ - Ubuntu Budgie (Mainly desktop)
+ - Ubuntu Cinnamon (Desktop only)
+ - Ubuntu Kylin (Both desktop and server)
+ - Ubuntu MATE (Desktop only)
+- Pacman-based
+ - Arch Linux based
+ - Arch Linux (Mainly desktop)
+ - SteamOS (Desktop only)
+- RPM-based (Supports `.rpm` and `AppImage` )
+- Fedora-based
+ - RHEL-based (Server only)
+- openSUSE (Both desktop and server)
+
+
+ - Please check whether your Linux distribution supports desktop, server, or both environments.
+
+
+
+
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Haswell processors (Q2 2013) and newer.
+- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Excavator processors (Q2 2015) and newer.
+
+
+
+
+- 8GB for running up to 3B models (int4).
+- 16GB for running up to 7B models (int4).
+- 32GB for running up to 13B models (int4).
+
+
+We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
+
+
+
+
+- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+
+
+Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
+
+
+
+
+- At least 10GB for app storage and model download.
+
+
+## Prerequisites
+
+- **System Libraries**:
+ - glibc 2.27 or higher. You can verify this by running `ldd --version`.
+ - Install gcc-11, g++-11, cpp-11, or later versions. Refer to the [Ubuntu installation guide](https://gcc.gnu.org/projects/cxx-status.html#cxx17) for assistance.
+- **Post-Installation Actions**:
+ - Add CUDA libraries to the `LD_LIBRARY_PATH` per the instructions in the [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions).
+
+## Installing Jan
+
+To install Jan, follow the steps below:
+
+### Step 1: Download the Jan Application
+
+Jan provides two types of releases:
+
+
+#### Stable Releases
+
+The stable release is a stable version of Jan. You can download a stable release Jan app via the following:
+
+- **Official Website**: [https://jan.ai](https://jan.ai/)
+- **Jan GitHub repository**: [Github](https://github.com/janhq/jan/releases)
+
+
+Make sure to verify the URL to ensure that it's the official Jan website and GitHub repository.
+
+
+
+#### Nightly Releases
+
+The nightly Release allows you to test out new features and get a sneak peek at what might be included in future stable releases. You can download this version via:
+
+- **Jan GitHub repository**: [Github](https://github.com/janhq/jan/actions/workflows/jan-electron-build-nightly.yml)
+
+
+Keep in mind that this build might crash frequently and may contain bugs!
+
+
+
+For Linux, Jan provides two types of downloads:
+
+1. **Ubuntu**: `.deb`
+2. **Fedora**: `.AppImage`
+
+### Step 2: Install the Jan Application
+
+Here are the steps to install Jan on Linux based on your Linux distribution:
+
+
+### Ubuntu
+Install Jan using the following command:
+
+
+
+```
+# Install Jan using dpkg
+sudo dpkg -i jan-linux-amd64-{version}.deb
+
+```
+
+
+
+```json
+# Install Jan using apt-get
+sudo apt-get install ./jan-linux-amd64-{version}.deb
+# where jan-linux-amd64-{version}.deb is the path to the Jan package
+```
+
+
+
+
+### Fedora
+
+1. Make the AppImage executable using the following command:
+
+```
+ chmod +x jan-linux-x86_64-{version}.AppImage
+
+```
+
+2. Run the AppImage file using the following command:
+
+```
+ ./jan-linux-x86_64-{version}.AppImage
+
+```
+
+
+## Data Folder
+
+By default, Jan is installed in the following directory:
+
+```
+# Custom installation directory
+$XDG_CONFIG_HOME = /home/username/custom_config
+
+or
+
+# Default installation directory
+~/.config/Jan/data
+
+```
+
+
+- Please see the [Jan Data Folder](/docs/data-folder) for more details about the data folder structure.
+
+
+## GPU Acceleration
+
+Once Jan is installed and you have a GPU, you can use your GPU to accelerate the model's performance.
+
+### NVIDIA GPU
+
+To enable the use of your NVIDIA GPU in the Jan app, follow the steps below:
+
+
+Ensure that you have installed the following to use NVIDIA GPU:
+- NVIDIA GPU with CUDA Toolkit 11.7 or higher.
+- NVIDIA driver 470.63.01 or higher.
+
+
+
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> **GPU Acceleration**.
+3. Enable and choose the NVIDIA GPU you want.
+4. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+
+While **Vulkan** can enable Nvidia GPU acceleration in the Jan app, **CUDA** is recommended for faster performance.
+
+
+
+### AMD GPU
+
+To enable the use of your AMD GPU in the Jan app, you need to activate the Vulkan support first by following the steps below:
+
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> enable the **Experimental Mode**.
+3. Enable the **Vulkan Support** under the **GPU Acceleration**.
+4. Enable the **GPU Acceleration** and choose the GPU you want to use.
+5. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+### Intel Arc GPU
+
+To enable the use of your Intel Arc GPU in the Jan app, you need to activate the Vulkan support first by following the steps below:
+
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> enable the **Experimental Mode**.
+3. Enable the **Vulkan Support** under the **GPU Acceleration**.
+4. Enable the **GPU Acceleration** and choose the GPU you want to use.
+5. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+
+## Uninstalling Jan
+
+To uninstall Jan, follow the steps below:
+
+
+
+### Ubuntu
+
+```bash
+# Uninstall Jan
+sudo apt-get remove jan
+
+# Remove the Jan data folder
+rm -rf Jan
+
+# Delete the application data
+rm -rf ~/.config/Jan/data
+
+# Delete the application cache
+rm -rf ~/.config/Jan/cache
+```
+
+
+### Fedora
+
+```bash
+# Uninstall Jan
+sudo dnf remove jan
+
+# Delete the application data
+rm -rf ~/.config/Jan/data
+
+# Delete the application cache
+rm -rf ~/.config/Jan/cache
+```
+
+
+
+
+The deleted Data Folder cannot be restored.
+
+
+
+{/* ## FAQs
+
+
+Nightly Releases allow you to test new features and previews of upcoming stable releases. You can download them from Jan's GitHub repository. However, remember that these builds might contain bugs and crash frequently.
+
+
+Yes, you can move the Jan data folder.
+
+
+Depending on your GPU type (NVIDIA, AMD, or Intel), follow the respective instructions provided in the [GPU Acceleration](https://www.notion.so/docs/desktop/windows#gpu-acceleration) section above.
+
+
+No, it cannot be restored once you delete the Jan data folder during uninstallation.
+
+
+Yes, `.AppImage` is designed to be distribution-agnostic, meaning it can run on various Linux distributions without requiring installation. You can use the Jan `.AppImage` on any Linux distribution that supports the `AppImage` format.
+
+
+No, `.deb` files are specifically intended for Debian-based distributions and may not be compatible with other Linux distributions.
+
+
+Warning: If you have any trouble during installation, please see our [Troubleshooting](/docs/troubleshooting) guide to resolve your problem.
+
+
+*/}
\ No newline at end of file
diff --git a/docs/src/pages/docs/desktop/mac.mdx b/docs/src/pages/docs/desktop/mac.mdx
new file mode 100644
index 0000000000..b1f952da75
--- /dev/null
+++ b/docs/src/pages/docs/desktop/mac.mdx
@@ -0,0 +1,187 @@
+---
+title: Mac
+description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ "desktop"
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+
+# Mac Installation
+Jan has been developed as a Mac Universal application, allowing it to run natively on both Apple Silicon and Intel-based Macs.
+
+## Compatibility
+
+Ensure that your system meets the following requirements to use Jan effectively:
+
+
+
+
+- MacOSX 13.6 or higher.
+
+
+- 8GB for running up to 3B models.
+- 16GB for running up to 7B models.
+- 32GB for running up to 13B models.
+
+
+- At least 10GB for app and model download.
+
+
+
+
+
+
+- MacOSX 13.6 or higher.
+
+
+- 8GB for running up to 3B models.
+- 16GB for running up to 7B models.
+- 32GB for running up to 13B models.
+
+Apple Silicon Macs leverage Metal for GPU acceleration, providing faster performance than Intel Macs, which rely solely on CPU processing.
+
+
+
+
+- At least 10GB for app and model download.
+
+
+
+
+## Installing Jan
+
+To install Jan, follow the steps below:
+
+### Step 1: Download the Jan Application
+
+Jan provides two types of releases:
+
+
+#### Stable Releases
+
+Please download Jan from official distributions, or build it from source.
+
+- **Official Website**: [https://jan.ai](https://jan.ai/)
+- **Jan GitHub repository**: [Github](https://github.com/janhq/jan/releases)
+
+
+Make sure to verify the URL to ensure that it's the official Jan website and GitHub repository.
+
+
+
+#### Nightly Releases
+
+Nightly Releases let you test out new features, which may be buggy:
+
+- **Jan GitHub repository**: [Github](https://github.com/janhq/jan/actions/workflows/jan-electron-build-nightly.yml)
+
+
+Keep in mind that this build might crash frequently and may contain bugs!
+
+
+
+### Step 2: Install the Jan Application
+
+1. Once you have downloaded the Jan app `.dmg` file, open the file.
+2. Drag the application icon to the Applications folder shortcut.
+3. Wait for the installation process.
+4. Once installed, you can access Jan on your machine.
+
+#### Install Jan with Homebrew
+
+You can also install Jan using the following Homebrew command:
+
+```bash
+brew install --cask jan
+```
+
+
+- Ensure that you have installed Homebrew and its dependencies.
+- Homebrew package installation is currently limited to **Apple Silicon Macs**, with upcoming support for Windows and Linux.
+
+
+## Data Folder
+
+By default, Jan is installed in the following directory:
+
+```sh
+# Default installation directory
+~/Library/Application\ Support/Jan/data
+```
+
+
+- Please see the [Jan Data Folder](/docs/data-folder) for more details about the data folder structure.
+
+
+## Metal Acceleration
+
+Jan is specially designed to work well on Mac Silicon, using `llama.cpp` as its main engine for processing AI tasks efficiently. It **automatically uses [Metal](https://developer.apple.com/documentation/metal)**, Apple's latest technology that can be used for GPU acceleration, so you don’t need to turn on this feature manually.
+
+
+💡 Metal, used for GPU acceleration, is not supported on Intel-based Mac devices.
+
+
+
+## Uninstalling Jan
+
+To uninstall Jan, follow the steps below:
+
+1. If the app is currently open, exit the app before continuing.
+2. Open the **Finder** menu.
+3. Click the **Applications** option from the sidebar.
+4. Find the **Jan** app or type in the search bar.
+5. Use any of these ways to move the **Jan** app to the Trash:
+- Drag the app to the Trash.
+- Select the app and choose the Move to Trash option.
+- Select the app and press Command-Delete on your keyboard.
+6. Use the following command to delete Jan's user data and app cache:
+```bash
+# Remove all user data
+rm -rf ~/jan
+
+# Delete the application data
+rm -rf ~/Library/Application\ Support/Jan/data
+
+# Delete the application cache
+rm -rf ~/Library/Application\ Support/Jan/cache
+```
+
+{/* ## FAQs
+
+
+Nightly Releases allow you to test new features and previews of upcoming stable releases. You can download them from Jan's GitHub repository. However, remember that these builds might contain bugs and crash frequently.
+
+
+Yes, you can move the Jan data folder.
+
+
+Depending on your Mac type (Apple Silicon or Intel), you won't be able to utilize the GPU acceleration feature if you have a Mac with an Intel processor.
+
+
+No, it cannot be restored once you delete the Jan data folder during uninstallation.
+
+
+
+💡 Warning: If you have any trouble during installation, please see our [Troubleshooting](/docs/troubleshooting) guide to resolve your problem.
+
+
+*/}
diff --git a/docs/src/pages/docs/desktop/windows.mdx b/docs/src/pages/docs/desktop/windows.mdx
new file mode 100644
index 0000000000..30f0b9918f
--- /dev/null
+++ b/docs/src/pages/docs/desktop/windows.mdx
@@ -0,0 +1,216 @@
+---
+title: Windows
+description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ "desktop"
+ ]
+---
+
+import { Tabs, Callout, Steps } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+
+# Windows Installation
+To install Jan desktop on Windows, follow the steps below:
+## Compatibility
+
+Ensure that your system meets the following requirements to use Jan effectively:
+
+
+- Windows 10 or higher.
+
+
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Haswell processors (Q2 2013) and newer.
+- Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
+
+
+
+- Jan supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2).
+- We support older processors with AVX and AVX-512, though this is not recommended.
+
+- Excavator processors (Q2 2015) and newer.
+
+
+
+
+- 8GB for running up to 3B models (int4).
+- 16GB for running up to 7B models (int4).
+- 32GB for running up to 13B models (int4).
+
+
+We support DDR2 RAM as the minimum requirement but recommend using newer generations of RAM for improved performance.
+
+
+
+
+- 6GB can load the 3B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 8GB can load the 7B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+- 12GB can load the 13B model (int4) with `ngl` at 120 ~ full speed on CPU/ GPU.
+
+
+Having at least 6GB VRAM when using NVIDIA, AMD, or Intel Arc GPUs is recommended.
+
+
+
+
+- At least 10GB for app storage and model download.
+
+
+## Installing Jan
+
+To install Jan, follow the steps below:
+
+### Step 1: Download the Jan Application
+
+Jan provides two types of releases:
+
+
+#### Stable Releases
+
+The stable release is a stable version of Jan. You can download a stable release Jan app via the following:
+
+- **Official Website**: [https://jan.ai](https://jan.ai/)
+- **Jan GitHub repository**: [Github](https://github.com/janhq/jan/releases)
+
+
+Make sure to verify the URL to ensure that it's the official Jan website and GitHub repository.
+
+
+
+#### Nightly Releases
+
+The nightly Release allows you to test out new features and get a sneak peek at what might be included in future stable releases. You can download this version via:
+
+- **Jan GitHub repository**: [Github](https://github.com/janhq/jan/actions/workflows/jan-electron-build-nightly.yml)
+
+
+Keep in mind that this build might crash frequently and may contain bugs!
+
+
+
+### Step 2: Install the Jan Application
+
+1. Once you have downloaded the Jan app `.exe` file, open the file.
+2. Wait for Jan to be completely installed on your machine.
+3. Once installed, you can access Jan on your machine.
+
+## Data Folder
+
+By default, Jan is installed in the following directory:
+
+```
+# Default installation directory
+~/Users//AppData/Roaming/Jan/data
+
+```
+
+
+- Please see the [Jan Data Folder](/docs/data-folder) for more details about the data folder structure.
+
+
+## GPU Acceleration
+
+Once Jan is installed and you have a GPU, you can use your GPU to accelerate the model's performance.
+
+### NVIDIA GPU
+
+To enable the use of your NVIDIA GPU in the Jan app, follow the steps below:
+
+
+Ensure that you have installed the following to use NVIDIA GPU:
+- NVIDIA GPU with CUDA Toolkit 11.7 or higher.
+- NVIDIA driver 470.63.01 or higher.
+
+
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> **GPU Acceleration**.
+3. Enable and choose the NVIDIA GPU you want.
+4. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+
+While **Vulkan** can enable Nvidia GPU acceleration in the Jan app, **CUDA** is recommended for faster performance.
+
+
+### AMD GPU
+
+To enable the use of your AMD GPU in the Jan app, you need to activate the Vulkan support first by following the steps below:
+
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> enable the **Experimental Mode**.
+3. Enable the **Vulkan Support** under the **GPU Acceleration**.
+4. Enable the **GPU Acceleration** and choose the AMD GPU you want to use.
+5. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+### Intel Arc GPU
+
+To enable the use of your Intel Arc GPU in the Jan app, you need to activate the Vulkan support first by following the steps below:
+
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> enable the **Experimental Mode**.
+3. Enable the **Vulkan Support** under the **GPU Acceleration**.
+4. Enable the **GPU Acceleration** and choose the Intel Arc GPU you want to use.
+5. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+
+## Uninstalling Jan
+
+To uninstall Jan, follow the steps below:
+
+### Step 1: Open the Control Panels
+
+1. Open the **Control Panels**.
+2. Click **Uninstall Program** under the **Programs** section.
+
+### Step 2: Uninstall Jan App
+
+1. Search for **Jan**.
+2. Click the **three dots icon** -> **Uninstall**.
+3. Click **Uninstall** once again to confirm the action.
+4. Click **OK**.
+5. A message will appear: **"Do you also want to delete the DEFAULT Jan data folder at C:\Users\{username}\Jan?"**.
+6. Click **OK** to delete the entire Jan data folder, or click **Cancel** to save your Jan Data folder so you can use this on the new installation folder.
+7. Navigate to `users/{username}/AppData/Roaming`.
+8. Delete the `Jan` folder that contains the app cache.
+
+
+The deleted Data Folder cannot be restored.
+
+
+{/* ## FAQs
+
+
+Nightly Releases allow you to test new features and previews of upcoming stable releases. You can download them from Jan's GitHub repository. However, remember that these builds might contain bugs and crash frequently.
+
+
+Yes, you can move the Jan data folder.
+
+
+Depending on your GPU type (NVIDIA, AMD, or Intel), follow the respective instructions provided in the [GPU Acceleration](https://www.notion.so/docs/desktop/windows#gpu-acceleration) section above.
+
+
+No, it cannot be restored once you delete the Jan data folder during uninstallation.
+
+
+
+If you have any trouble during installation, please see our [Troubleshooting](https://www.notion.so/docs/troubleshooting) guide to resolve your problem.
+ */}
diff --git a/docs/src/pages/docs/error-codes.mdx b/docs/src/pages/docs/error-codes.mdx
new file mode 100644
index 0000000000..65890c739b
--- /dev/null
+++ b/docs/src/pages/docs/error-codes.mdx
@@ -0,0 +1,101 @@
+---
+title: Error Codes
+description: Learn Jan application's error codes and how to solve them.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ troubleshooting,
+ error codes,
+ broken build,
+ something amiss,
+ unexpected token,
+ undefined issue,
+ permission denied,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout } from 'nextra/components'
+
+# Error Codes
+This article summarizes error codes in the Jan application, categorized by application feature. Each error includes a unique code and a **Prefix** indicating its category.
+The following sections detail the error categories, their descriptions, and the error types.
+
+See the Solution column for error troubleshooting.
+
+## Error Code Categories
+
+### 1. Installation Errors
+These errors relate to issues encountered during installation, including desktop and server setups.
+- **Prefix**: INST
+
+### 2. Hardware Setup Errors
+These errors relate to configuring and setting up hardware components.
+- **Prefix**: HW
+#### Error Types
+| Error Code | Cause | Solution |
+|------------|-----------------------------------------|-------------------------------------------------------------------------------------------|
+| HW-1 | The CUDA toolkit may be unavailable. | [Troubleshooting Nvidia GPU](/docs/troubleshooting#1-ensure-gpu-mode-requirements) |
+| HW-2 | Problem with Nvidia drivers. | [Troubleshooting Nvidia GPU](/docs/troubleshooting#troubleshooting-nvidia-gpu) |
+
+
+### 3. Architecture Errors
+These errors relate to problems with the overall system architecture and configuration setups.
+- **Prefix**: ARCH
+
+### 4. Basic Usage Errors
+These errors occur during the basic usage of the application, including issues with models, assistants, and tools.
+- **Prefix**: USG
+#### Error Types
+| Error Code | Cause | Solution |
+|------------|-----------------------------------|--------------------------------------|
+| USG-1 | Model is currently unavailable. | [How to Use Model](/docs/models) |
+
+
+### 5. Advanced Settings Errors
+These errors relate to advanced settings and configurations within the application.
+- **Prefix**: ADV
+
+### 6. Inference Engine Errors
+These errors relate to inference engines, both local and remote.
+- **Prefix**: IE
+#### Error Types
+| Error Code | Cause | Solution |
+|------------|---------------------------------------------|---------------------------------------------------------------------------------------------------------------|
+| IE-2 | The OpenAI model's API key is invalid. | [How to Integrate OpenAI API](/docs/remote-inference/openai#how-to-integrate-openai-api-with-jan) |
+| IE-3 | The Groq model's API key is invalid. | [How to Integrate Groq API](/docs/remote-inference/groq#how-to-integrate-groq-api-with-jan) |
+| IE-4 | The Mistral model's API key is invalid. | [How to Integrate Mistral API](/docs/remote-inference/mistralai) |
+| IE-5 | The OpenRouter model's API key is invalid. | [How to Integrate OpenRouter API](/docs/remote-inference/openrouter) |
+
+### 7. Local API Server Errors
+These errors relate to the local API server's functionality.
+- **Prefix**: API
+#### Error Types
+| Error Code | Cause | Solution |
+|------------|----------------------------------------|--------------------------------------------------------------|
+| API-1 | Port 3928 is currently unavailable. | [Local API Server Guide](/docs/local-api#step-1-set-the-local-server) |
+
+
+### 8. Extensions and Integration Errors
+These errors relate to integrating the application with external systems or extensions.
+- **Prefix**: EXT
+
+### 9. Troubleshooting Errors
+These errors occur during the troubleshooting processes and procedures.
+- **Prefix**: TRO
+
+### 10. Unclear Errors
+These errors don't classify into the standard categories above, making their causes difficult to identify.
+#### Error Codes
+| Error Title | Solution |
+|-------------------|------------------|
+| Something’s Amiss | [Troubleshooting Something’s Amiss](/docs/troubleshooting#somethings-amiss) |
+| Undefined Issue | [Troubleshooting Undefined Issue](/docs/troubleshooting#undefined-issue) |
diff --git a/docs/src/pages/docs/extensions.mdx b/docs/src/pages/docs/extensions.mdx
new file mode 100644
index 0000000000..b660b365f9
--- /dev/null
+++ b/docs/src/pages/docs/extensions.mdx
@@ -0,0 +1,369 @@
+---
+title: Extensions Overview
+description: Learn about Jan's default extensions and explore how to configure them.
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Jan Extensions,
+ Extensions,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+
+
+# Extensions Overview
+
+The current Jan Desktop Client has some default extensions built on this framework to enhance the user experience. There are two types of extensions:
+- Model Providers: These are extensions that enable Jan to support cloud-based models and provide the engine to support additional local models.
+- Core Extensions: These are built-in tools for managing and monitoring the Jan's system.
+
+## List of Model Provider Extensions
+| Extension Name | Version | Description |
+|----------------|---------|-------------|
+| Anthropic | v1.0.2 | This extension enables Anthropic chat completion API calls |
+| Cohere | v1.0.0 | This extension enables Cohere chat completion API calls |
+| Groq | v1.0.1 | This extension enables fast Groq chat completion API calls |
+| Martian | v1.0.1 | This extension enables Martian chat completion API calls |
+| MistralAI | v1.0.1 | This extension enables Mistral chat completion API calls |
+| TensorRT-LLM | v0.0.3 | This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the setup guide for next steps |
+| NVIDIA NIM | v1.0.1 | This extension enables NVIDIA chat completion API calls |
+| OpenAI | v1.0.2 | This extension enables OpenAI chat completion API calls |
+| OpenRouter | v1.0.0 | This extension enables Open Router chat completion API calls |
+| Triton-TRT-LLM | v1.0.0 | This extension enables Nvidia's Triton-TRT-LLM as an inference engine option |
+
+## List of Core Extensions
+| Extension Name | Version | Description |
+|---------------------|----------|-----------------------------------------------------------------------------|
+| Jan Assistant | v1.0.1 | This extension enables assistants, including Jan, a default assistant that can call all downloaded models |
+| Conversational | v1.0.0 | This extension enables conversations and state persistence via your filesystem |
+| Model Management | v1.0.33 | Model Management Extension provides model exploration and seamless downloads |
+| System Monitoring | v1.0.10 | This extension provides system health and OS level data |
+
+
+## Configure an Extension Settings
+
+To configure an extension settings:
+
+1. Navigate to the `~/jan/data/extensions`.
+2. Open the `extensions.json` file
+3. Edit the file with options including:
+
+| Option | Description |
+| ---------------- | ----------------------------------- |
+| `_active` | Enable/disable the extension. |
+| `listeners` | Default listener setting. |
+| `origin` | Extension file path. |
+| `installOptions` | Version and metadata configuration. |
+| `name` | Extension name. |
+| `productName` | Extension display name. |
+| `version` | Extension version. |
+| `main` | Main file path. |
+| `description` | Extension description. |
+| `url` | Extension URL. |
+
+```json title="~/jan/data/extensions/extensions.json"
+{
+ "@janhq/conversational-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-conversational-extension-1.0.0.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/conversational-extension",
+ "productName": "Conversational",
+ "version": "1.0.0",
+ "main": "dist/index.js",
+ "description": "This extension enables conversations and state persistence via your filesystem",
+ "url": "extension://@janhq/conversational-extension/dist/index.js"
+ },
+ "@janhq/inference-anthropic-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-anthropic-extension-1.0.2.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-anthropic-extension",
+ "productName": "Anthropic Inference Engine",
+ "version": "1.0.2",
+ "main": "dist/index.js",
+ "description": "This extension enables Anthropic chat completion API calls",
+ "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
+ },
+ "@janhq/inference-triton-trt-llm-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-triton-trt-llm-extension-1.0.0.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-triton-trt-llm-extension",
+ "productName": "Triton-TRT-LLM Inference Engine",
+ "version": "1.0.0",
+ "main": "dist/index.js",
+ "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
+ "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
+ },
+ "@janhq/inference-mistral-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-mistral-extension-1.0.1.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-mistral-extension",
+ "productName": "MistralAI Inference Engine",
+ "version": "1.0.1",
+ "main": "dist/index.js",
+ "description": "This extension enables Mistral chat completion API calls",
+ "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
+ },
+ "@janhq/inference-martian-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-martian-extension-1.0.1.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-martian-extension",
+ "productName": "Martian Inference Engine",
+ "version": "1.0.1",
+ "main": "dist/index.js",
+ "description": "This extension enables Martian chat completion API calls",
+ "url": "extension://@janhq/inference-martian-extension/dist/index.js"
+ },
+ "@janhq/inference-openrouter-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-openrouter-extension-1.0.0.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-openrouter-extension",
+ "productName": "OpenRouter Inference Engine",
+ "version": "1.0.0",
+ "main": "dist/index.js",
+ "description": "This extension enables Open Router chat completion API calls",
+ "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
+ },
+ "@janhq/inference-nvidia-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-nvidia-extension-1.0.1.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-nvidia-extension",
+ "productName": "NVIDIA NIM Inference Engine",
+ "version": "1.0.1",
+ "main": "dist/index.js",
+ "description": "This extension enables NVIDIA chat completion API calls",
+ "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
+ },
+ "@janhq/inference-groq-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-groq-extension-1.0.1.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-groq-extension",
+ "productName": "Groq Inference Engine",
+ "version": "1.0.1",
+ "main": "dist/index.js",
+ "description": "This extension enables fast Groq chat completion API calls",
+ "url": "extension://@janhq/inference-groq-extension/dist/index.js"
+ },
+ "@janhq/inference-openai-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-openai-extension-1.0.2.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-openai-extension",
+ "productName": "OpenAI Inference Engine",
+ "version": "1.0.2",
+ "main": "dist/index.js",
+ "description": "This extension enables OpenAI chat completion API calls",
+ "url": "extension://@janhq/inference-openai-extension/dist/index.js"
+ },
+ "@janhq/inference-cohere-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-cohere-extension-1.0.0.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-cohere-extension",
+ "productName": "Cohere Inference Engine",
+ "version": "1.0.0",
+ "main": "dist/index.js",
+ "description": "This extension enables Cohere chat completion API calls",
+ "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
+ },
+ "@janhq/model-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-model-extension-1.0.33.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/model-extension",
+ "productName": "Model Management",
+ "version": "1.0.33",
+ "main": "dist/index.js",
+ "description": "Model Management Extension provides model exploration and seamless downloads",
+ "url": "extension://@janhq/model-extension/dist/index.js"
+ },
+ "@janhq/monitoring-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-monitoring-extension-1.0.10.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/monitoring-extension",
+ "productName": "System Monitoring",
+ "version": "1.0.10",
+ "main": "dist/index.js",
+ "description": "This extension provides system health and OS level data",
+ "url": "extension://@janhq/monitoring-extension/dist/index.js"
+ },
+ "@janhq/assistant-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-assistant-extension-1.0.1.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/assistant-extension",
+ "productName": "Jan Assistant",
+ "version": "1.0.1",
+ "main": "dist/index.js",
+ "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
+ "url": "extension://@janhq/assistant-extension/dist/index.js"
+ },
+ "@janhq/tensorrt-llm-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-tensorrt-llm-extension-0.0.3.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/tensorrt-llm-extension",
+ "productName": "TensorRT-LLM Inference Engine",
+ "version": "0.0.3",
+ "main": "dist/index.js",
+ "description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.",
+ "url": "extension://@janhq/tensorrt-llm-extension/dist/index.js"
+ },
+ "@janhq/inference-cortex-extension": {
+ "_active": true,
+ "listeners": {},
+ "origin": "C:\\Users\\ACER\\AppData\\Local\\Programs\\jan\\resources\\app.asar.unpacked\\pre-install\\janhq-inference-cortex-extension-1.0.15.tgz",
+ "installOptions": {
+ "version": false,
+ "fullMetadata": true
+ },
+ "name": "@janhq/inference-cortex-extension",
+ "productName": "Cortex Inference Engine",
+ "version": "1.0.15",
+ "main": "dist/index.js",
+ "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
+ "url": "extension://@janhq/inference-cortex-extension/dist/index.js"
+ }
+}
+```
+
+## Specific Extension Settings
+Jan offers an Extensions settings menu for configuring extensions that have registered their settings within the application. Here, you can directly integrate Remote Inference Engines with Jan without inserting the URL and API Key directly in the `JSON` file. Additionally, you can turn the Logging extensions available on or off in Jan. To access the Extension settings, follow the steps below:
+1. Navigate to the main dashboard.
+2. Click the **gear icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Click **Extensions**.
+
+![Extensions](./_assets/extensions-page2.png)
+
+## Turn Off an Extension
+
+To turn off the extension, follow the steps below:
+
+1. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+2. Click the **Extensions** button.
+
+![Extensions](./_assets/extensions-page2.png)
+
+3. Click the slider button to turn off the extension.
+
+![Extensions](./_assets/turn-off.png)
+
+4. Restart the app to see that the extension has been disabled.
+
+## Model Management
+The Model Management extension allows Jan's app to download specific models from Hugging Face repositories, which require an access token for access.
+1. Navigate to the main dashboard.
+2. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Under the **Core Extensions** section, select the **Model Management** extension.
+
+![Model Management extension](./_assets/model-management1.png)
+
+4. Enter the HuggingFace access token.
+
+![Model Management Enable](./_assets/model-management2.png)
+
+
+## System Monitor
+The System Monitor extension now offers enhanced customization for app logging. Users can toggle the application logging feature on or off and set a custom interval for clearing the app logs. To configure the app log feature, follow these steps:
+1. Navigate to the main dashboard.
+2. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Under the **Core Extensions** section, select the **System Monitoring** extension.
+
+![System Monitoring extension](./_assets/system-monitor2.png)
+
+4. Use the **slider** to turn the app logging feature on or off.
+
+![System Monitoring Enable](./_assets/system-slider2.png)
+
+5. Specify the log cleaning interval in milliseconds.
+
+![System Monitoring Interval](./_assets/system-mili2.png)
+
+
+ - You can clear the app logs manually by clicking the **Clear logs** button in the advanced settings.
+ - There are no minimum or maximum intervals for setting the time. However, invalid inputs will default to `120000ms (2 minutes)`.
+
\ No newline at end of file
diff --git a/docs/src/pages/docs/index.mdx b/docs/src/pages/docs/index.mdx
new file mode 100644
index 0000000000..027ad11b63
--- /dev/null
+++ b/docs/src/pages/docs/index.mdx
@@ -0,0 +1,154 @@
+---
+title: Jan
+description: Jan is an open-source, self-hosted alternative to OpenAI's platform - build and run AI on your own desktop or server.
+keywords:
+ [
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ OpenAI platform alternative,
+ local API,
+ local AI,
+ private AI,
+ conversational AI,
+ no-subscription fee,
+ large language model,
+ LLM,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+import FAQBox from '@/components/FaqBox'
+
+# Jan
+
+![Jan's Cover Image](./_assets/jan-display.png)
+
+
+Jan is a ChatGPT-alternative that runs 100% offline on your [Desktop](/docs/desktop-installation). Our goal is to make it easy for a layperson[^1] to download and run LLMs and use AI with full control and [privacy](https://www.reuters.com/legal/legalindustry/privacy-paradox-with-ai-2023-10-31/).
+
+Jan is powered by [Cortex](https://cortex.so/), our embeddable local AI engine.
+
+
+**OpenAI-equivalent API:** Jan runs a Cortex Server in the background, which provides an OpenAI-equivalent API at https://localhost:1337.
+
+You'll be able to use it with [Continue.dev](https://jan.ai/integrations/coding/vscode), [Open Interpreter](https://jan.ai/integrations/function-calling/interpreter), or any OpenAI-compatible app.
+
+
+### Features
+
+- [Model Library](https://jan.ai/docs/models/manage-models#add-models) with popular LLMs like Llama3, Gemma or Mistral
+- Connect to [Remote AI APIs](https://jan.ai/docs/remote-inference/openai) like Groq and OpenRouter
+- [Local API Server](https://jan.ai/api-reference) with OpenAI-equivalent API
+- [Extensions](https://jan.ai/docs/extensions) for customizing Jan
+
+### Philosophy
+
+Jan is built to be [User-owned](about#-user-owned):
+- Open source via the [AGPLv3 license](https://github.com/janhq/jan/blob/dev/LICENSE)
+- [Local-first](https://www.inkandswitch.com/local-first/), with all data stored locally
+- Runs 100% offline, with privacy by default
+- We do not [collect or sell user data](/privacy)
+
+
+ You can read more about our [philosophy](/about#philosophy) here.
+
+
+### Inspirations
+
+Jan is inspired by the concepts of [Calm Computing](https://en.wikipedia.org/wiki/Calm_technology), and the Disappearing Computer.
+
+## Acknowledgements
+
+Jan is built on the shoulders of many upstream open-source projects:
+
+- [Llama.cpp](https://github.com/ggerganov/llama.cpp/blob/master/LICENSE)
+- [LangChain.js](https://github.com/langchain-ai/langchainjs/blob/main/LICENSE)
+- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/blob/main/LICENSE)
+- [TheBloke/GGUF](https://huggingface.co/TheBloke)
+- [Scalar](https://github.com/scalar/scalar)
+
+## FAQs
+
+
+ Jan runs and trains models (LLMs) on your laptop or desktop computers.
+
+
+
+ Download Jan Desktop on your computer, download a compatible LLM, connect to a remote AI with the API key, and start chatting. You can switch between models as needed.
+
+
+
+ Jan is available for Mac, Windows, and Linux via Docker/Helm, ensuring wide compatibility.
+
+ GPU-wise, Jan supports Nvidia, AMD (through Vulkan), and Intel.
+
+
+
+ No. Not even a little. Your usage data is entirely local and private and never leaves your computer.
+
+ We also don't track IP or other identifying information.
+
+
+If you use Jan in remote/API mode, i.e., chatting with ChatGPT, they may still collect your info.
+
+
+
+
+ No, and we never will.
+
+
+
+ Jan prioritizes your privacy by running open-source AI models 100% offline on your computer. Conversations, documents, and files stay private. You can find your own user data at `~/jan` on your local filesystem.
+
+
+
+ Jan stands for “Just a Name". We are, admittedly, bad at marketing 😂.
+
+
+
+ Yes, Jan defaults to running locally without an internet connection.
+
+
+
+ Jan is free to use. However, if you want to connect to remote APIs, like
+ GPT-4, you will need to put in your own API key.
+
+
+
+ You can download popular AI models through Jan's Hub or import any model you choose directly from HuggingFace.
+
+
+
+ Jan is built like VSCode and Obsidian. It supports 3rd party extensions. In fact, most of the UI-level features were built with extensions in a few lines of code.
+
+
+
+ Contributions can be made through [GitHub](https://github.com/janhq/jan) and [Discord](https://discord.gg/Exe46xPMbK), where you can also suggest features and make pull requests. No need to ask for permission. We're fully open-source!
+
+
+
+ Joining [Jan's Discord server](https://discord.gg/qSwXFx6Krr) is a great way
+ to get involved with the community.
+
+
+
+ For troubleshooting, you should reach out on Discord and check GitHub for
+ assistance and support from the community and the development team.
+
+
+
+ Yes! We love the self-hosted movement. Jan is available as a Helm chart/
+ Docker composes which can be run across home servers or even production-level
+ environments.
+
+
+
+ We often hire directly from our community. If you want to apply,
+ please see our careers page [here](https://homebrew.bamboohr.com/careers).
+
+
+
+## Footnotes
+
+[^1]: Our definition of "Non-technical" == don't need to know how to use Command Line
\ No newline at end of file
diff --git a/docs/src/pages/docs/installing-extension.mdx b/docs/src/pages/docs/installing-extension.mdx
new file mode 100644
index 0000000000..6471e9135b
--- /dev/null
+++ b/docs/src/pages/docs/installing-extension.mdx
@@ -0,0 +1,46 @@
+---
+title: Installing an Extension
+description: A step-by-step guide on installing an extension.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Jan Extensions,
+ Extensions,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+
+
+## Installing an Extension
+
+Jan automatically installs 11 default extensions when you set up the application. You can also add a custom third-party extension at your own risk.
+
+Here are the steps to install a custom extension:
+
+Jan only accepts the `.tgz` file format for installing a custom extension.
+
+
+1. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+2. Click the **Extensions** button.
+
+![Extensions](./_assets/extensions-page2.png)
+
+2. Select **Install Extension** on top right corner.
+
+![Install Extension](./_assets/install-ext.png)
+
+3. Select a `.tgz` extension file.
+4. Restart the Jan application.
+5. Then, the `~/jan/data/extensions/extensions.json` file will be updated automatically.
\ No newline at end of file
diff --git a/docs/src/pages/docs/models.mdx b/docs/src/pages/docs/models.mdx
new file mode 100644
index 0000000000..76aa6c991b
--- /dev/null
+++ b/docs/src/pages/docs/models.mdx
@@ -0,0 +1,35 @@
+---
+title: Models
+description: Jan is a ChatGPT-alternative that runs on your computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Hardware Setup,
+ GPU,
+ ]
+---
+
+import { Cards, Card } from 'nextra/components'
+import childPages from './models/_meta.json';
+
+# Models
+
+
+
+ (
+
+ ))}
+/>
\ No newline at end of file
diff --git a/docs/src/pages/docs/models/_meta.json b/docs/src/pages/docs/models/_meta.json
new file mode 100644
index 0000000000..bdebbe23fc
--- /dev/null
+++ b/docs/src/pages/docs/models/_meta.json
@@ -0,0 +1,10 @@
+{
+ "manage-models": {
+ "title": "Managing Models",
+ "href": "/docs/models/manage-models"
+ },
+ "model-parameters": {
+ "title": "Model Parameters",
+ "href": "/docs/models/model-parameters"
+ }
+}
diff --git a/docs/src/pages/docs/models/manage-models.mdx b/docs/src/pages/docs/models/manage-models.mdx
new file mode 100644
index 0000000000..d9903c6dac
--- /dev/null
+++ b/docs/src/pages/docs/models/manage-models.mdx
@@ -0,0 +1,199 @@
+---
+title: Managing Models
+description: Manage your interaction with AI locally.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ threads,
+ chat history,
+ thread history,
+ ]
+---
+import { Callout, Steps } from 'nextra/components'
+
+# Overview
+This guide provides comprehensive instructions on adding, customizing, and deleting models within the Jan platform.
+
+## Add Models
+
+There are various ways to add models to Jan.
+
+Currently, Jan natively supports the following model formats:
+- GGUF (through a llama.cpp engine)
+- TensorRT (through a TRT-LLM engine)
+
+### Download from Jan Hub
+Jan Hub provides three convenient methods to access machine learning models. Here’s a clear step-by-step guide for each method:
+
+#### 1. Download from the Recommended List
+The Recommended List is a great starting point if you're looking for popular and pre-configured models that work well and quickly on most computers.
+
+1. Open the Jan app and navigate to the Hub.
+
+![Jan Hub](../_assets/hub.png)
+
+2. Select models, clicking the `v` dropdown for more information.
+
+Models with the `Recommended` label will likely run faster on your computer.
+
+3. Click **Download** to download the model.
+
+![Download Model](../_assets/download-button.png)
+
+#### 2. Download with HuggingFace Model's ID or URL
+If you need a specific model from [Hugging Face](https://huggingface.co/models), Jan Hub lets you download it directly using the model’s ID or URL.
+
+Only `GGUF` models are supported for this feature.
+
+1. Go to the [Hugging Face](https://huggingface.co/models).
+2. Select the model you want to use.
+3. Copy the Model's ID or URL, for example: `MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUF` or `https://huggingface.co/MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUF`.
+4. Return to the Jan app and click on the Hub tab.
+
+![Jan Hub](../_assets/hub.png)
+
+5. Paste the **URL** or the **model ID** you have copied into the search bar.
+
+![Search Bar](../_assets/search-bar.png)
+
+6. The app will show all available versions of the model.
+7. Click **Download** to download the model.
+
+![Download Model](../_assets/download-button2.png)
+
+#### 3. Download with Deep Link
+You can also use Jan's deep link feature to download a specific model from [Hugging Face](https://huggingface.co/models). The deep link format is: `jan://models/huggingface/`.
+
+The deep link feature cannot be used for models that require:
+- API Token.
+- Acceptance of usage agreement.
+
+You will need to download such models manually.
+
+1. Go to the [Hugging Face](https://huggingface.co/models).
+2. Select the model you want to use.
+3. Copy the Model's ID or URL, for example: `TheBloke/Magicoder-S-DS-6.7B-GGUF`.
+4. Enter the deep link URL with your chosen model's ID in your browser. For example: `jan://models/huggingface/TheBloke/Magicoder-S-DS-6.7B-GGUF`
+
+![Paste the URL](../_assets/browser1.png)
+
+5. A prompt will appear, click **Open** to open the Jan app.
+
+![Click Open](../_assets/browser2.png)
+
+6. The app will show all available versions of the model.
+7. Click **Download** to download the model.
+
+![Download Model](../_assets/download-button3.png)
+
+### Import or Symlink Local Models
+
+You can also point to existing model binary files on your local filesystem.
+This is the easiest and most space-efficient way if you have already used other local AI applications.
+
+1. Navigate to the Settings
+
+![Jan Hub](../_assets/hub.png)
+
+2. Click on `My Models` at the top.
+
+![Import Model](../_assets/import.png)
+
+3. Click the `Import Model` button on the top-right of your screen.
+4. Click the upload icon button.
+
+![Download Icon](../_assets/download-icon.png)
+
+4. Import using `.GGUF` file or a folder.
+
+![Import Model](../_assets/import2.png)
+
+5. Select the model or the folder containing multiple models.
+
+### Add a Model Manually
+You can also add a specific model that is not available within the **Hub** section by following the steps below:
+1. Open the Jan app.
+2. Click the **gear icon (⚙️)** on the bottom left of your screen.
+
+![Settings](../_assets/settings.png)
+
+3. Under the **Settings screen**, click **Advanced Settings**.
+
+![Settings](../_assets/advance-set.png)
+
+4. Open the **Jan Data folder**.
+
+![Jan Data Folder](../_assets/data-folder.png)
+
+5. Head to the `~/jan/data/models/`.
+6. Make a new model folder and put a file named `model.json` in it.
+7. Insert the following `model.json` default code:
+```json
+{
+ "id": "",
+ "object": "",
+ "name": "",
+ "version": "",
+ "description": "",
+ "format": "",
+ "settings": "",
+ "parameters": {
+ "max_tokens": "",
+ "temperature": ""
+ },
+ "metadata": {
+ "author": "",
+ "tags": [""]
+ },
+ "engine": "",
+ "source": ""
+}
+```
+There are two important fields in `model.json` that you need to set:
+
+#### Settings
+
+This is the field where you can set your engine configurations.
+
+#### Parameters
+
+`parameters` are the adjustable settings that affect how your model operates or processes the data.
+The fields in `parameters` are typically general and can be the same across models. Here is an example of model parameters:
+
+```json
+"parameters":{
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+}
+```
+
+
+To see the complete list of a model's parameters, please see [Model Parameters](/docs/models#model-parameters).
+
+
+## Delete Models
+To delete a model:
+
+1. Go to **Settings**.
+
+![Settings](../_assets/settings.png)
+
+2. Go to **My Models**.
+
+![My Models](../_assets/mymodels.png)
+
+3. Select the three dots next and select **Delete model**.
+
+![Delete Model](../_assets/delete.png)
\ No newline at end of file
diff --git a/docs/src/pages/docs/models/model-parameters.mdx b/docs/src/pages/docs/models/model-parameters.mdx
new file mode 100644
index 0000000000..855cc990ea
--- /dev/null
+++ b/docs/src/pages/docs/models/model-parameters.mdx
@@ -0,0 +1,69 @@
+---
+title: Model Parameters
+description: Manage your interaction with AI locally.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ threads,
+ chat history,
+ thread history,
+ ]
+---
+import { Callout, Steps } from 'nextra/components'
+
+## Model Parameters
+A model has three main parameters to configure:
+- Inference Parameters
+- Model Parameters
+- Engine Parameters
+
+### Inference Parameters
+Inference parameters are settings that control how an AI model generates outputs. These parameters include the following:
+| Parameter | Description |
+|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **Temperature** | - Influences the randomness of the model's output. - A higher temperature leads to more random and diverse responses, while a lower temperature produces more predictable outputs. |
+| **Top P** | - Sets a probability threshold, allowing only the most likely tokens whose cumulative probability exceeds the threshold to be considered for generation. - A lower top-P value (e.g., 0.9) may be more suitable for focused, task-oriented applications, while a higher top-P value (e.g., 0.95 or 0.97) may be better for more open-ended, creative tasks. |
+| **Stream** | - Enables real-time data processing, which is useful for applications needing immediate responses, like live interactions. It accelerates predictions by processing data as it becomes available. - Turned on by default. |
+| **Max Tokens** | - Sets the upper limit on the number of tokens the model can generate in a single output. - A higher limit benefits detailed and complex responses, while a lower limit helps maintain conciseness.|
+| **Stop Sequences** | - Defines specific tokens or phrases that signal the model to stop producing further output. - Use common concluding phrases or tokens specific to your application’s domain to ensure outputs terminate appropriately. |
+| **Frequency Penalty** | - Modifies the likelihood of the model repeating the same words or phrases within a single output, reducing redundancy in the generated text. - Increase the penalty to avoid repetition in scenarios where varied language is preferred, such as creative writing or content generation.|
+| **Presence Penalty** | - Encourages the generation of new and varied concepts by penalizing tokens that have already appeared, promoting diversity and novelty in the output. - Use a higher penalty for tasks requiring high novelty and variety, such as brainstorming or ideation sessions.|
+
+### Model Parameter
+Model parameters are the settings that define and configure the model's behavior. These parameters include the following:
+| Parameter | Description |
+|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **Prompt Template** | - This predefined text or framework generates responses or predictions. It is a structured guide that the AI model fills in or expands upon during the generation process. - For example, a prompt template might include placeholders or specific instructions that direct how the model should formulate its outputs. |
+
+### Engine Parameters
+Engine parameters are the settings that define how the model processes input data and generates output. These parameters include the following:
+| Parameter | Description |
+|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **Number of GPU Layers (ngl)** | - This parameter specifies the number of transformer layers in the model that are offloaded to the GPU for accelerated computation. Utilizing the GPU for these layers can significantly reduce inference time due to the parallel processing capabilities of GPUs. - Adjusting this parameter can help balance between computational load on the GPU and CPU, potentially improving performance for different deployment scenarios. |
+| **Context Length** | - This parameter determines the maximum input amount the model can generate responses. The maximum context length varies with the model used. This setting is crucial for the model’s ability to produce coherent and contextually appropriate outputs. - For tasks like summarizing long documents that require extensive context, use a higher context length. A lower setting can quicken response times and lessen computational demand for simpler queries or brief interactions. |
+
+
+By default, Jan sets the **Context Length** to the maximum supported by your model, which may slow down response times. For lower-spec devices, reduce **Context Length** to **1024** or **2048**, depending on your device's specifications, to improve speed.
+
+
+## Customize the Model Settings
+Adjust model settings for a specific conversation:
+
+1. Navigate to a **thread**.
+2. Click the **Model** tab.
+
+![Specific Conversation](../_assets/model-tab.png)
+3. You can customize the following parameters:
+ - Inference parameters
+ - Model parameters
+ - Engine parameters
+
+![Specific Conversation](../_assets/model-parameters.png)
diff --git a/docs/src/pages/docs/quickstart.mdx b/docs/src/pages/docs/quickstart.mdx
new file mode 100644
index 0000000000..192b63efcb
--- /dev/null
+++ b/docs/src/pages/docs/quickstart.mdx
@@ -0,0 +1,120 @@
+---
+title: Desktop installation
+description: Get started quickly with Jan, a ChatGPT-alternative that runs on your own computer, with a local API server. Learn how to install Jan and select an AI model to start chatting.
+sidebar_position: 2
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ quickstart,
+ getting started,
+ using AI model,
+ installation,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout, Steps } from 'nextra/components'
+
+
+# Quickstart
+
+
+### Step 1: Install Jan
+You can run Jan either on your desktop using the Jan desktop app or on a server by installing the Jan server. To get started, check out the [Desktop](/docs/desktop) installation pages.
+
+Once you have installed Jan, you should see the Jan application as shown below without any local model installed:
+
+
+![Default State](./_assets/default.gif)
+
+
+
+### Step 2: Turn on the GPU Acceleration (Optional)
+If you have a graphics card, boost model performance by enabling GPU acceleration:
+1. Open Jan application.
+2. Go to **Settings** -> **Advanced Settings** -> **GPU Acceleration**.
+3. Click the Slider and choose your preferred GPU.
+3. A success notification saying **Successfully turned on GPU acceleration** will appear when GPU acceleration is activated.
+
+Ensure you have installed your GPU driver. Please see [Desktop](/docs/desktop) for more information on activating the GPU acceleration.
+
+
+
+![Turn on GPU acceleration](./_assets/gpu2.gif)
+
+### Step 3: Download a Model
+
+Jan offers various local AI models tailored to different needs, all ready for download directly to your device:
+
+1. Go to the **Hub**.
+2. Select the models that you would like to install. To see model details, click the model name.
+3. You can also paste the Hugging Face model's **ID** or **URL** in the search bar.
+
+Ensure you select the appropriate model size by balancing performance, cost, and resource considerations in line with your task's specific requirements and hardware specifications.
+
+4. Click the **Download** button.
+
+
+![Download a Model](./_assets/download-model2.gif)
+
+
+
+5. Go to the **Thread** tab.
+6. Click the **Model** tab button.
+7. Choose either **On-device** or **Cloud** section.
+8. Adjust the configurations as needed.
+
+ Please see [Model Parameters](/docs/models#model-parameters) for detailed model configuration.
+
+
+
+
+![Parameters](./_assets/inf.gif)
+
+
+### Step 4: Customize the Assistant Instruction
+Customize Jan's assistant behavior by specifying queries, commands, or requests in the Assistant Instructions field to get the most responses from your assistant. To customize, follow the steps below:
+1. On the **Thread** section, navigate to the right panel.
+2. Select the **Assistant** tab menu.
+3. Provide a specific guideline under the **Instructions** field.
+
+
+![Assistant Instruction](./_assets/asst.gif)
+
+
+### Step 5: Start Thread
+
+Once you have downloaded a model and customized your assistant instruction, you can start chatting with the model.
+
+
+
+![Chat with a Model](./_assets/chat.gif)
+
+
+
+### Step 6: Connect to a Remote API
+Jan also offers access to remote models hosted on external servers. You can link up with any Remote AI APIs compatible with OpenAI. Jan comes with numerous extensions that facilitate connections to various remote AI APIs. To explore and connect to Remote APIs, follow these steps:
+1. On the **Thread** section, navigate to the right panel.
+2. Select the **Model** tab menu.
+3. Next to the **OpenAI** models -> click the **Gear Icon (⚙️)**.
+4. Enter your OpenAI API **Keys**.
+
+
+
+![Connect Remote API](./_assets/server-openai2.gif)
+
+
+
+
+## What's Next?
+Now that Jan is up and running, explore further:
+1. Learn how to download and manage your [models](/docs/models).
+2. Customize Jan's [application settings](/docs/settings) according to your preferences.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/_meta.json b/docs/src/pages/docs/remote-models/_meta.json
new file mode 100644
index 0000000000..97ad17bad4
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/_meta.json
@@ -0,0 +1,38 @@
+{
+ "openai": {
+ "title": "OpenAI API",
+ "href": "/docs/remote-models/openai"
+ },
+ "azure": {
+ "title": "Azure OpenAI API",
+ "href": "/docs/remote-models/azure",
+ "display": "hidden"
+ },
+ "groq": { "title": "Groq API", "href": "/docs/remote-models/groq" },
+ "mistralai": {
+ "title": "Mistral AI API",
+ "href": "/docs/remote-models/mistralai"
+ },
+ "openrouter": { "title": "OpenRouter", "href": "/docs/remote-models/openrouter" },
+ "generic-openai": { "title": "Any OpenAI Compatible API", "href": "/docs/remote-models/generic-openai", "display": "hidden"},
+ "martian": {
+ "title": "Martian API",
+ "href": "/docs/remote-models/martian"
+ },
+ "cohere": {
+ "title": "Cohere API",
+ "href": "/docs/remote-models/cohere"
+ },
+ "anthropic": {
+ "title": "Anthropic API",
+ "href": "/docs/remote-models/anthropic"
+ },
+ "nvidia-nim": {
+ "title": "NVIDIA NIM API",
+ "href": "/docs/remote-models/nvidia-nim"
+ },
+ "triton": {
+ "title": "Triton-TRT-LLM",
+ "href": "/docs/remote-models/triton"
+ }
+}
diff --git a/docs/src/pages/docs/remote-models/anthropic.mdx b/docs/src/pages/docs/remote-models/anthropic.mdx
new file mode 100644
index 0000000000..33259bb3b3
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/anthropic.mdx
@@ -0,0 +1,61 @@
+---
+title: Anthropic
+description: Learn how to integrate Anthropic with Jan for enhanced functionality.
+keywords:
+ [
+ Anthropic API,
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ conversational AI,
+ large language model,
+ integration,
+ Anthropic integration,
+ API integration
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Anthropic
+
+## How to Integrate Anthropic with Jan
+
+This guide provides step-by-step instructions on integrating Anthropic with Jan, enabling users to chat with Claude's LLMs within Jan's conversational interface.
+
+Before proceeding, ensure you have the following:
+- Access to the Jan application
+- Anthropic API credentials
+
+## Integration Steps
+
+
+
+### Step 1: Configure Anthropic API Key
+1. Obtain Anthropic API Keys from your [Anthropic Console](https://console.anthropic.com/).
+2. Copy your **Anthropic API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **Anthropic**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **Anthropic**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Anthropic** section under Model Providers.
+4. Insert your **Anthropic API Key**.
+
+### Step 2: Start Chatting with the Model
+
+1. Select the Anthropic model you want to use.
+
+Anthropic is the default extension for the Jan application. All the Anthropic models are automatically installed when you install the Jan application.
+
+2. Specify the model's parameters.
+3. Start the conversation with the Anthropic model.
+
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Anthropic with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Anthropic API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/azure.mdx b/docs/src/pages/docs/remote-models/azure.mdx
new file mode 100644
index 0000000000..af49fe89b6
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/azure.mdx
@@ -0,0 +1,58 @@
+---
+title: Azure OpenAI
+description: A step-by-step guide on integrating Jan with Azure OpenAI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ integration,
+ Azure OpenAI Service,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+
+
+# Azure OpenAI API
+
+## How to Integrate Azure OpenAI API with Jan
+This guide provides step-by-step instructions for integrating the Azure OpenAI API with Jan, allowing users to utilize Azure's capabilities within Jan's conversational interface.
+
+## Integration Steps
+
+### Step 1: Configure OpenAI API Key
+1. Obtain OpenAI API Key from your [OpenAI Platform](https://platform.openai.com/api-keys) dashboard.
+2. Copy your **OpenAI API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **OpenAI**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **OpenAI**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **OpenAI** section under Model Providers.
+4. Insert your **OpenAI API Key**.
+
+
+The **OpenAI** fields can be used for any OpenAI-compatible API.
+
+
+### Step 2: Start Chatting with the Model
+
+1. Select the OpenAI model you want to use.
+2. Specify the model's parameters.
+3. Start the conversation with the OpenAI model.
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using OpenAI with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Azure OpenAI API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/cohere.mdx b/docs/src/pages/docs/remote-models/cohere.mdx
new file mode 100644
index 0000000000..8f774a0de7
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/cohere.mdx
@@ -0,0 +1,61 @@
+---
+title: Cohere
+description: Learn how to integrate Cohere with Jan for enhanced functionality.
+keywords:
+ [
+ Cohere API,
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ conversational AI,
+ large language model,
+ integration,
+ Cohere integration,
+ API integration
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Cohere
+
+## How to Integrate Cohere with Jan
+
+This guide provides step-by-step instructions on integrating Cohere with Jan, enabling users to chat with Cohere's LLMs within Jan's conversational interface.
+
+Before proceeding, ensure you have the following:
+- Access to the Jan application
+- Cohere API credentials
+
+## Integration Steps
+
+
+
+### Step 1: Configure Cohere API Key
+1. Obtain Cohere API Keys from your [Cohere Dashboard](https://dashboard.cohere.com/).
+2. Copy your **Cohere API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **Cohere**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **Cohere**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Cohere** section under Model Providers.
+4. Insert your **Cohere API Key**.
+
+
+### Step 2: Start Chatting with the Model
+
+1. Select the Cohere model you want to use.
+
+Cohere is the default extension for the Jan application. All the Cohere models are automatically installed when you install the Jan application.
+
+2. Specify the model's parameters.
+3. Start the conversation with the Cohere model.
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Cohere with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Cohere API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/generic-openai.mdx b/docs/src/pages/docs/remote-models/generic-openai.mdx
new file mode 100644
index 0000000000..22f4833729
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/generic-openai.mdx
@@ -0,0 +1,69 @@
+---
+title: Any OpenAI Compatible API
+description: A step-by-step guide on how to set up Jan to connect with any remote or local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ import-models-manually,
+ remote server,
+ OAI compatible,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Any OpenAI-compatible API
+This guide outlines the process for configuring Jan as a client for both remote and local API servers, using the `mistral-ins-7b-q4` model for illustration. We'll show how to connect to Jan's API-hosting servers.
+
+
+ Currently, you can only connect to one OpenAI-compatible endpoint at a time.
+
+
+
+### Step 1: Configure a Client Connection
+
+1. Navigate to the **Jan app** > **Settings**.
+2. Select the **OpenAI**.
+
+
+The **OpenAI** fields can be used for any OpenAI-compatible API.
+
+
+3. Insert the **API Key** and the **endpoint URL** into their respective fields. For example, if you're going to communicate to Jan's API server, you can configure it as follows:
+```json
+"full_url": "https://:1337/v1/chat/completions"
+```
+
+ Please note that currently, the code that supports any OpenAI-compatible endpoint only reads the `~/jan/data/extensions/@janhq/inference-openai-extension/settings.json` file, which is OpenAI Inference Engines in the extensions page. Thus, it will not search any other files in this directory.
+
+
+![Server Setup](../_assets/OpenAi-1.gif)
+
+### Step 2: Start Chatting with the Model
+
+1. Navigate to the **Hub** section.
+2. Select the model you want to use.
+3. Specify the model's parameters.
+4. Start the conversation with the model.
+
+![Start Model](../_assets/OpenAi-2.gif)
+
+
+ If you have questions or want more preconfigured GGUF models, please join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using OpenAI with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to their API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/groq.mdx b/docs/src/pages/docs/remote-models/groq.mdx
new file mode 100644
index 0000000000..32c192fc50
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/groq.mdx
@@ -0,0 +1,62 @@
+---
+title: Groq API
+description: Learn how to integrate Groq API with Jan for enhanced functionality.
+keywords:
+ [
+ Groq API,
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ conversational AI,
+ large language model,
+ integration,
+ Groq integration,
+ API integration
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Groq API
+
+## How to Integrate Groq API with Jan
+
+This guide provides step-by-step instructions on integrating the Groq API with Jan, enabling users to leverage Groq's capabilities within Jan's conversational interface.
+
+Before proceeding, ensure you have the following:
+- Access to the Jan application
+- Groq API credentials
+
+## Integration Steps
+
+
+
+### Step 1: Configure Groq API Key
+1. Obtain Groq API Keys from your [Groq Console](https://console.groq.com/keys) dashboard.
+2. Copy your **Groq API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **Groq**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **Groq**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Groq** section under Model Providers.
+4. Insert your **Groq API Key**.
+
+### Step 2: Start Chatting with the Model
+
+1. Select the Groq model you want to use.
+
+
+ The Groq Inference Engine is the default extension for the Jan application. All the Groq models are automatically installed when you install the Jan application.
+
+2. Specify the model's parameters.
+3. Start the conversation with the Groq model.
+
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Groq with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Groq API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/martian.mdx b/docs/src/pages/docs/remote-models/martian.mdx
new file mode 100644
index 0000000000..03278709a2
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/martian.mdx
@@ -0,0 +1,61 @@
+---
+title: Martian
+description: Learn how to integrate Martian with Jan for enhanced functionality.
+keywords:
+ [
+ Martian API,
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ conversational AI,
+ large language model,
+ integration,
+ Martian integration,
+ API integration
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Martian
+
+## How to Integrate Martian with Jan
+
+This guide provides step-by-step instructions on integrating Martian with Jan, enabling users to leverage Martian's capabilities within Jan's conversational interface.
+
+Before proceeding, ensure you have the following:
+- Access to the Jan application
+- Martian API credentials
+
+## Integration Steps
+
+
+
+### Step 1: Configure Martian API Key
+1. Obtain Martian API Keys from your [Martian Dashboard](https://auth.withmartian.com/).
+2. Copy your **Martian API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **Martian**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **Martian**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Martian** section under Model Providers.
+4. Insert your **Martian API Key**.
+
+
+### Step 2: Start Chatting with the Model
+
+1. Select the Martian model you want to use.
+
+Martian is the default extension for the Jan application. All the Martian models are automatically installed when you install the Jan application.
+
+2. Specify the model's parameters.
+3. Start the conversation with the Martian model.
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Martian with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Martian API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/mistralai.mdx b/docs/src/pages/docs/remote-models/mistralai.mdx
new file mode 100644
index 0000000000..5e153dbeca
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/mistralai.mdx
@@ -0,0 +1,65 @@
+---
+title: Mistral AI API
+description: A step-by-step guide on integrating Jan with Mistral AI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Mistral integration,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Mistral AI API
+
+## How to Integrate Mistral AI with Jan
+This guide provides step-by-step instructions for integrating the Mistral API with Jan, enabling users to utilize Mistral's capabilities within Jan's conversational interface.
+
+Before proceeding, ensure you have the following:
+- Access to the Jan Application
+- Mistral API credentials
+
+## Integration Steps
+
+
+### Step 1: Configure Mistral API Key
+
+1. Obtain the Mistral API Key from your [Mistral](https://console.mistral.ai/user/api-keys/) dashboard.
+2. Copy your **Mistral API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **Mistral**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **Mistral**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Mistral** section under Model Providers.
+4. Insert your **Mistral API Key**.
+
+
+ - Mistral AI offers various endpoints. Refer to their [endpoint documentation](https://docs.mistral.ai/platform/endpoints/) to select the one that fits your requirements.
+
+
+### Step 2: Start Chatting with the Model
+
+1. Select the Mistral model you want to use.
+
+
+ The MistralAI is the default extension for the Jan application. All the Mistral models are automatically installed when you install the Jan application.
+
+2. Specify the model's parameters.
+3. Start the conversation with the Mistral model.
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Mistral with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Mistral API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/nvidia-nim.mdx b/docs/src/pages/docs/remote-models/nvidia-nim.mdx
new file mode 100644
index 0000000000..2fafa96cda
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/nvidia-nim.mdx
@@ -0,0 +1,60 @@
+---
+title: NVIDIA NIM
+description: Learn how to integrate NVIDIA NIM with Jan for enhanced functionality.
+keywords:
+ [
+ Nvidia API,
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ conversational AI,
+ large language model,
+ integration,
+ Nvidia integration,
+ API integration
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# NVIDIA NIM
+
+## How to Integrate NVIDIA NIM with Jan
+
+This guide provides step-by-step instructions on integrating NVIDIA NIM with Jan, enabling users to leverage Nvidia NIM's capabilities within Jan's conversational interface.
+
+
+ Nvidia NIM extension is only supported on Jan version 0.5.1 or later.
+
+
+Before proceeding, ensure you have the following:
+- Access to the Jan application
+- NVIDIA NIM API credentials
+
+## Integration Steps
+
+### Step 1: Configure Nvidia API Key
+1. Obtain Nvidia API Keys from your [Nvidia dashboard](https://org.ngc.nvidia.com/setup/personal-keys).
+2. Copy your **Nvidia API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **Nvidia**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **Nvidia**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **NVIDIA NIM** section under Model Providers.
+4. Insert your **Nvidia API Key**.
+
+
+### Step 2: Start Chatting with the Model
+
+1. Select the model you want to use.
+2. Specify the model's parameters.
+3. Start the conversation with the model.
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Nvidia with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Nvidia API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/openai.mdx b/docs/src/pages/docs/remote-models/openai.mdx
new file mode 100644
index 0000000000..cd7d0454ef
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/openai.mdx
@@ -0,0 +1,72 @@
+---
+title: OpenAI API
+description: A step-by-step guide on integrating Jan with Azure OpenAI.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ integration,
+ Azure OpenAI Service,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+
+
+# OpenAI API
+
+
+The OpenAI Extension can be used for any OpenAI-compatible API endpoints.
+
+
+## How to Integrate OpenAI API with Jan
+This guide provides step-by-step instructions for integrating the OpenAI API with Jan, allowing users to utilize OpenAI's capabilities within Jan's conversational interface.
+
+## Integration Steps
+
+### Step 1: Configure OpenAI API Key
+1. Obtain OpenAI API Key from your [OpenAI Platform](https://platform.openai.com/api-keys) dashboard.
+2. Copy your **OpenAI API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **OpenAI**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **OpenAI**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **OpenAI** section under Model Providers.
+4. Insert your **OpenAI API Key**.
+
+### Step 2: Start Chatting with the Model
+
+1. Select the OpenAI model you want to use.
+
+
+The OpenAI is the default extension for the Jan application. All the OpenAI models are automatically installed when you install the Jan application.
+
+
+2. Specify the model's parameters.
+3. Start the conversation with the OpenAI model.
+
+
+### OpenAI Models
+
+You can also use specific OpenAI models you cannot find in the **Hub** section by customizing the `model.yaml` file, which you can see in the `~/jan/data/models/`. Follow the steps in the [Manage Models](/docs/models/manage-models) to manually add a model.
+
+
+- You can find the list of available models in the [OpenAI Platform](https://platform.openai.com/docs/models/overview).
+- The `id` property must match the model name in the list.
+ - For example, if you want to use the [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo), you must set the `id` property to `gpt-4-1106-preview`.
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using OpenAI with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to OpenAI API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/openrouter.mdx b/docs/src/pages/docs/remote-models/openrouter.mdx
new file mode 100644
index 0000000000..8efcd932a0
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/openrouter.mdx
@@ -0,0 +1,55 @@
+---
+title: OpenRouter
+description: A step-by-step guide on integrating Jan with OpenRouter.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ OpenRouter integration,
+ OpenRouter,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# OpenRouter
+
+## Integrate OpenRouter with Jan
+
+[OpenRouter](https://openrouter.ai/docs#quick-start) is a tool that gathers AI models. Developers can utilize its API to engage with diverse large language models, generative image models, and generative 3D object models.
+
+To connect Jan with OpenRouter for accessing remote Large Language Models (LLMs) through OpenRouter, you can follow the steps below:
+
+
+### Step 1: Configure OpenRouter API Key
+
+1. Find your API Key in the [OpenRouter API Key](https://openrouter.ai/keys).
+2. Copy your **OpenRouter API Key**.
+3. There are three ways to configure your API Key in Jan app:
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **My Models** tab > **Add Icon (➕)** next to **OpenRouter**.
+ - Navigate to the **Jan app** > **Thread** > **Model** tab > **Add Icon (➕)** next to **OpenRouter**.
+ - Navigate to the **Jan app** > **Gear Icon (⚙️)** > **OpenRouter** section under Model Providers.
+4. Insert your **OpenRouter API Key**.
+5. For **OpenRouter**, specify the model you want to use, or the system will default to the preset model linked to your **OpenRouter API Key**.
+
+### Step 2: Start Chatting with the Model
+
+1. Select the OpenRouter model you want to use.
+2. Specify the model's parameters.
+3. Start the conversation with the OpenRouter model.
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using OpenAI with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to OpenRouter API support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/remote-models/triton.mdx b/docs/src/pages/docs/remote-models/triton.mdx
new file mode 100644
index 0000000000..494882c426
--- /dev/null
+++ b/docs/src/pages/docs/remote-models/triton.mdx
@@ -0,0 +1,57 @@
+---
+title: Triton-TRT-LLM
+description: Learn how to integrate Triton-TRT-LLM with Jan for enhanced functionality.
+keywords:
+ [
+ Triton-TRT-LLM API,
+ Jan,
+ Jan AI,
+ ChatGPT alternative,
+ conversational AI,
+ large language model,
+ integration,
+ Triton-TRT-LLM integration,
+ API integration
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Triton-TRT-LLM
+
+## How to Integrate Triton-TRT-LLM with Jan
+
+This guide provides step-by-step instructions on integrating Triton-TRT-LLM with Jan, enabling users to leverage Triton-TRT-LLM's capabilities within Jan's conversational interface.
+
+
+ Triton-TRT-LLM extension is only supported on Jan version 0.5.1 or later.
+
+
+Before proceeding, ensure you have the following:
+- Access to the Jan application
+- Triton-TRT-LLM credentials
+
+## Integration Steps
+
+### Step 1: Configure Triton-TRT-LLM API Key
+1. Run [Triton-TRT-LLM](https://github.com/npuichigo/openai_trtllm) to obtain the API Keys.
+2. Copy your **Triton-TRT-LLM Key**.
+3. Navigate to the **Jan app** > **Gear Icon (⚙️)** > **Triton-TRT-LLM** section under Model Providers.
+4. Insert your **Triton-TRT-LLM API Key**.
+
+
+### Step 2: Start Chatting with the Model
+
+1. Select the model you want to use.
+2. Specify the model's parameters.
+3. Start the conversation with the model.
+
+
+
+## Troubleshooting
+
+If you encounter any issues during the integration process or while using Triton-TRT-LLM with Jan, consider the following troubleshooting steps:
+
+- Double-check your API credentials to ensure they are correct.
+- Check for error messages or logs that may provide insight into the issue.
+- Reach out to Triton-TRT-LLM support for assistance if needed.
\ No newline at end of file
diff --git a/docs/src/pages/docs/settings.mdx b/docs/src/pages/docs/settings.mdx
new file mode 100644
index 0000000000..80fbb610de
--- /dev/null
+++ b/docs/src/pages/docs/settings.mdx
@@ -0,0 +1,231 @@
+---
+title: Settings
+description: Explore how to adjust the advanced settings of the Jan application to suit your specific requirements.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Advanced Settings,
+ HTTPS Proxy,
+ SSL,
+ settings,
+ Jan settings,
+ ]
+---
+
+import { Tabs, Steps, Callout } from 'nextra/components'
+
+# Settings
+
+This guide will show you how to customize your Jan application settings and advanced settings.
+
+## Settings File
+Settings for the Jan application are stored in a `cortex.db` file located at `~jan/`, ensuring they persist across sessions. This file contains all user preferences and configurations.
+## Customize the UI
+My Settings is where you can customize the color of Jan's desktop app UI. Here's how to personalize the color scheme of Jan's desktop app UI:
+1. Navigate to the main dashboard.
+2. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Select the **Appearance** section.
+
+![Settings](./_assets/appearance.png)
+
+4. Pick the **Appearance Scheme** for your Jan desktop app. Options include:
+ - Joi Light
+ - Joi Dark
+ - Dark Dimmed
+ - Night Blue
+
+![Settings](./_assets/scheme.png)
+
+5. Choose the **Interface theme** for your Jan desktop app. Options include:
+ - Solid
+ - Transparent
+
+![Settings](./_assets/theme.png)
+
+## Access the Spell Check
+1. Navigate to the main dashboard.
+2. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Select the **Appearance** section.
+4. Click the **Spell Check** slider to enable it.
+
+![Spell](./_assets/spell.png)
+
+
+## Access Advanced Settings
+Advanced Settings is the GUI version of the `settings.json`. To access Jan's advanced settings, follow the steps below:
+
+Whenever you make changes in the Jan application's Settings screen, they are automatically saved to the `settings.json` file. This ensures your customizations are kept and applied every time the application starts.
+
+
+1. Navigate to the main dashboard.
+2. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Click the **Advanced Settings**.
+
+![Settings](./_assets/advance-settings2.png)
+
+4. You can configure the following settings:
+
+| Feature | Description |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **Experimental Mode** | Enables experimental features that may be unstable. |
+| **GPU Acceleration** | Enables boosting your model performance by using your GPU devices for acceleration. |
+| **HTTPS Proxy** | Use a proxy server for internet connections. Please check out the guide on setting up your HTTPS proxy server [here](settings#https-proxy). |
+| **Ignore SSL Certificates** | Enables the self-signed or unverified certificates. |
+| **Migrate Data From Old Version Of Jan App** | Facilitates the transfer of your data from a previous version of the Jan App to the latest version. This feature helps you retain your settings, preferences, and stored information during the upgrade process. |
+
+
+## Enable the Experimental Mode
+
+To try out new features that are still in the testing phase, follow the steps below:
+
+1. Navigate to the **Advanced Settings**.
+2. On the **Experimental Mode**, click the slider to enable.
+
+![Experimental](./_assets/exp-mode.png)
+
+## Enable the GPU Acceleration
+
+To enhance your model performance, follow the steps below:
+
+
+ Ensure you have read the [troubleshooting
+ guide](/docs/troubleshooting#troubleshooting-nvidia-gpu) here for further
+ assistance.
+
+
+1. Navigate to the **Advanced Settings**.
+2. On the **GPU Acceleration**, click the slider to enable.
+
+![Enable GPU](./_assets/gpu-accel.png)
+
+## Enable the Vulkan Support
+
+ This feature is still in experimental phase.
+
+
+To enable the Vulkan support for AMD or Intel ARC GPU, follow the steps below:
+
+1. Enable the **Experimental Mode**.
+2. Navigate to the **Advanced Settings**.
+3. On the **Vulkan Support**, click the slider to enable.
+
+![Vulkan](./_assets/vulkan.png)
+
+4. Restart the Jan app.
+
+## Enable the Preserve Model Settings
+
+ This feature is still in experimental phase.
+
+To enable the preserve model settings to be applied to the new thread, follow the steps below:
+
+1. Enable the **Experimental Mode**.
+2. Navigate to the **Advanced Settings**.
+3. On the **Preserve Model Settings**, click the slider to enable.
+
+![Preserve](./_assets/preserve.png)
+
+
+## Access the Jan Data Folder
+
+To access the folder where messages, model configurations, and user data are stored, follow the steps below:
+
+1. Navigate to the **Advanced Settings**.
+
+![Settings](./_assets/advance-set.png)
+
+2. On the **Jan Data Folder** click the **folder icon (📂)** to access the data or the **pencil icon (✏️)** to change the folder where you keep your data.
+
+![Jan Data Folder](./_assets/data-folder.png)
+
+3. You can also access the Jan Data Folder by clicking **System Monitor** > **App Log**.
+
+
+- Uninstalling Jan in Windows and Linux will delete the default Jan Data Folder.
+
+
+## HTTPS Proxy
+
+HTTPS Proxy encrypts data between your browser and the internet, making it hard for outsiders to intercept or read. It also helps you maintain your privacy and security while bypassing regional restrictions on the internet.
+
+
+ - When configuring Jan using an HTTPS proxy, the speed of the downloading model may be affected by the encryption and decryption process. It also depends on the cloud service provider's networking.
+ - HTTPS Proxy does not affect the remote model usage.
+
+
+Once you set up your HTTPS proxy server, follow the steps below:
+
+1. Navigate to **Settings** > **Advanced Settings**.
+2. On the **HTTPS Proxy**, click the slider to enable.
+3. Input your domain in the blank field.
+
+![HTTPS Proxy](./_assets/http.png)
+
+
+## Ignore SSL Certificate
+
+To Allow self-signed or unverified certificates, follow the steps below:
+
+1. Navigate to the **Advanced Settings**.
+2. On the **Ignore SSL Certificates**, click the slider to enable.
+
+![Ignore SSL](./_assets/ssl.png)
+
+## Enable the Jan Quick Ask
+
+ This feature is still in experimental phase.
+
+To enable the Jan quick ask mode, follow the steps below:
+
+1. Enable the **Experimental Mode**.
+2. Navigate to the **Advanced Settings**.
+3. On the **Quick Ask**, click the slider to enable.
+
+![Quick Ask](./_assets/quick-ask.png)
+
+4. Restart the Jan app.
+
+## Clear Logs
+
+To clear all logs on your Jan app, follow the steps below:
+
+
+ This feature clears all the data in your **Jan Data Folder**.
+
+
+1. Navigate to the **Advanced Settings**.
+2. On the **Clear Logs** click the the **Clear** button.
+
+![Clear Logs](./_assets/clear-logs.png)
+
+## Reset To Factory Default
+
+To reset the Jan app to its original state, follow the steps below:
+
+
+ This irreversible action is only recommended if the application is corrupted.
+
+
+
+1. Navigate to the **Advanced Settings**.
+2. On the **Reset To Factory Default** click the the **Reset** button.
+
+![Reset](./_assets/reset-jan.png)
+
diff --git a/docs/src/pages/docs/shortcuts.mdx b/docs/src/pages/docs/shortcuts.mdx
new file mode 100644
index 0000000000..3905a6f8ca
--- /dev/null
+++ b/docs/src/pages/docs/shortcuts.mdx
@@ -0,0 +1,83 @@
+---
+title: Keyboard Shortcuts
+description: Lists all the available keyboard shortcuts for Windows, Mac, and Linux.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Advanced Settings,
+ HTTPS Proxy,
+ SSL,
+ settings,
+ Jan settings,
+ ]
+---
+
+import { Tabs, Steps, Callout } from 'nextra/components'
+
+## Keyboard Shortcuts
+To find the list of all the available shortcuts within Jan app, please follow the steps below:
+1. Navigate to the main dashboard.
+2. Click the **Gear Icon (⚙️)** on the bottom left of your screen.
+
+![Settings](./_assets/settings.png)
+
+3. Click the **Hotkey & Shortcut**.
+
+![Keyboard Shortcut](./_assets/shortcut.png)
+
+
+Here are some of the keyboard shortcuts that you can use in Jan.
+
+
+
+ | Combination | Description |
+ | --------------- | -------------------------------------------------- |
+ | `⌘ N` | Create a new thread. |
+ | `⌘ B` | Toggle collapsible left panel. |
+ | `⌘ Shift B` | Toggle collapsible right panel. |
+ | `⌘ ,` | Navigate to the setting page. |
+ | `Enter` | Send a message. |
+ | `Shift + Enter` | Insert new line in input box. |
+ | `Arrow Up` | Navigate to the previous option (within the search dialog). |
+ | `Arrow Down` | Navigate to the next option (within the search dialog). |
+
+
+
+
+ | Combination | Description |
+ | --------------- | ---------------------------------------------------------- |
+ | `Ctrl N` | Create a new thread. |
+ | `Ctrl B` | Toggle collapsible left panel. |
+ | `Ctrl Shift B` | Toggle collapsible right panel. |
+ | `Ctrl ,` | Navigate to the setting page. |
+ | `Enter` | Send a message. |
+ | `Shift + Enter` | Insert new line in input box. |
+ | `Arrow Up` | Navigate to the previous option (within the search dialog). |
+ | `Arrow Down` | Navigate to the next option (within the search dialog). |
+
+
+
+
+ | Combination | Description |
+ | --------------- | ---------------------------------------------------------- |
+ | `Ctrl N` | Create a new thread. |
+ | `Ctrl B` | Toggle collapsible left panel. |
+ | `Ctrl Shift B` | Toggle collapsible right panel. |
+ | `Ctrl ,` | Navigate to the setting page. |
+ | `Enter` | Send a message. |
+ | `Shift + Enter` | Insert new line in input box. |
+ | `Arrow Up` | Navigate to the previous option (within the search dialog). |
+ | `Arrow Down` | Navigate to the next option (within the search dialog). |
+
+
+
+
+
diff --git a/docs/src/pages/docs/threads.mdx b/docs/src/pages/docs/threads.mdx
new file mode 100644
index 0000000000..9fea52d668
--- /dev/null
+++ b/docs/src/pages/docs/threads.mdx
@@ -0,0 +1,73 @@
+---
+title: Using Threads
+description: Manage your interaction with AI locally.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ threads,
+ chat history,
+ thread history,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+
+# Using Threads
+
+Jan provides a straightforward and private solution for managing your threads with AI on your device. As you interact with AI using Jan, you'll accumulate a history of threads.
+Jan offers easy tools to organize, delete, or review your past threads with AI. This guide will show you how to keep your threads private and well-organized.
+
+## View Thread History
+
+To view your thread history, follow the steps below:
+
+1. Navigate to the main dashboard.
+2. Locate the list of threads screen on the left side.
+3. To view a specific thread, choose the one you're interested in and then scroll up or down to explore the entire conversation.
+
+![History](./_assets/history.png)
+
+## Change the Thread's Title
+To change a thread's title, follow the steps below:
+
+1. Navigate to the Thread that you want to edit.
+2. Hover to a thread and click on the **three dots (⋮)** in the Thread section.
+3. Select the **Edit Title** button.
+
+![Clean Thread](./_assets/title.png)
+
+## Clean Threads History
+
+To clean all the messages from a thread, follow the steps below:
+
+1. Navigate to the Thread that you want to clean.
+2. Hover to a thread and click on the **three dots (⋮)** in the Thread section.
+3. Select the **Clean Thread** button.
+
+![Clean Thread](./_assets/clean.png)
+
+
+ This will delete all messages in the thread while keeping the thread settings.
+
+
+### Delete Threads History
+
+To delete a thread, follow the steps below:
+
+1. Navigate to the Thread that you want to delete.
+2. Hover to a thread and click on the **three dots (⋮)** in the Thread section.
+3. Select the **Delete Thread** button.
+
+![Delete Thread](./_assets/delete-threads.png)
+
+
+ This will delete all messages and the thread settings.
+
diff --git a/docs/src/pages/docs/tools/retrieval.mdx b/docs/src/pages/docs/tools/retrieval.mdx
new file mode 100644
index 0000000000..c0276d8ecd
--- /dev/null
+++ b/docs/src/pages/docs/tools/retrieval.mdx
@@ -0,0 +1,61 @@
+---
+title: Knowledge Retrieval
+description: A step-by-step guide to chat with PDF documents.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ use tools,
+ rag,
+ retrieval,
+ chat with pdf,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Knowledge Retrieval
+This article lists the capabilities of the Jan platform and guides you through using RAG to chat with PDF documents.
+
+To access this feature, please enable Experimental mode in the [Advanced Settings](/guides/advanced/#enable-the-experimental-mode).
+
+
+## Enable the Knowledge Retrieval
+
+To chat with PDFs using RAG in Jan, follow these steps:
+
+1. Create a **new thread**.
+2. Click the **Tools** tab.
+
+![Retrieval](../_assets/tools.png)
+
+3. Enable the **Retrieval**.
+
+![Retrieval](../_assets/retrieval1.png)
+
+4. Adjust the **Retrieval** settings as needed. These settings include the following:
+
+| Feature | Description |
+|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **Retrieval** | - Utilizes information from uploaded files, automatically retrieving content relevant to your queries for enhanced interaction. - Use this for complex inquiries where context from uploaded documents significantly enhances response quality. |
+| **Embedding Model** | - Converts text into numerical representations for machine understanding. - Choose a model based on your needs and available resources, balancing accuracy and computational efficiency. |
+| **Vector Database** | - Facilitates quick searches through stored numerical text representations to find relevant information efficiently. - Optimize your vector database settings to ensure quick retrieval without sacrificing accuracy, particularly in applications with large data sets. |
+| **Top K** | - Determines the number of top-ranked documents to retrieve, allowing control over search result relevance. - Adjust this setting based on the precision needed. A lower value for more precise, focused searches and a higher value for broader, more comprehensive searches. |
+| **Chunk Size** | - Sets the maximum number of tokens per data chunk, which is crucial for managing processing load and maintaining performance. - Increase the chunk size for processing large blocks of text efficiently, or decrease it when dealing with smaller, more manageable texts to optimize memory usage. |
+| **Chunk Overlap** | - Specifies the overlap in tokens between adjacent chunks to ensure continuous context in split text segments. - Adjust the overlap to ensure smooth transitions in text analysis, with higher overlap for complex texts where context is critical. |
+| **Retrieval Template**| - Defines the query structure using variables like `{CONTEXT}` and `{QUESTION}` to tailor searches to specific needs. - Customize templates to closely align with your data's structure and the queries' nature, ensuring that retrievals are as relevant as possible. |
+5. Select the model you want to use.
+
+To upload an image or GIF, ensure that you are using a multimodal model. If not, you are limited to uploading documents only.
+
+6. Click on the 📎 icon in the chat input field.
+7. Select **Document** to upload a document file.
+
+![Retrieval](../_assets/retrieval2.png)
diff --git a/docs/src/pages/docs/troubleshooting.mdx b/docs/src/pages/docs/troubleshooting.mdx
new file mode 100644
index 0000000000..d6725b2881
--- /dev/null
+++ b/docs/src/pages/docs/troubleshooting.mdx
@@ -0,0 +1,416 @@
+---
+title: Troubleshooting
+description: Explore solutions for common issues and optimize Jan's performance with this comprehensive troubleshooting guide.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ troubleshooting,
+ error codes,
+ broken build,
+ something amiss,
+ unexpected token,
+ undefined issue,
+ permission denied,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout } from 'nextra/components'
+
+# Troubleshooting
+
+## Broken Build
+
+To resolve the issue where your Jan is stuck in a broken build after installation.
+
+
+
+ 1. Uninstall Jan.
+
+ 2. Delete Application Data, Cache, and User Data:
+
+ ```zsh
+ # Step 1: Delete the application data
+ rm -rf ~/Library/Application\ Support/jan/data
+
+ # Step 2: Clear application cache
+ rm -rf ~/Library/Application\ Support/Jan/cache
+
+ # Step 3: Remove all user data
+ rm -rf ~/jan
+ ```
+
+ 3. If you are using a version before `0.4.2`, you need to run the following commands:
+
+ ```zsh
+ ps aux | grep nitro
+ # Looks for processes like `nitro` and `nitro_arm_64`, and kill them one by one by process ID
+ kill -9
+ ```
+
+ 4. Download the latest version of Jan from our [homepage](https://jan.ai/).
+
+
+
+ 1. Uninstall Jan on Windows, by using the [Windows Control Panel](https://support.microsoft.com/en-us/windows/uninstall-or-remove-apps-and-programs-in-windows-4b55f974-2cc6-2d2b-d092-5905080eaf98).
+
+ 2. Delete Application Data, Cache, and User Data:
+
+ ```bash
+ # You can delete the `/Jan` directory in Windows's AppData Directory by visiting the following path `%APPDATA%\Jan`
+ cd C:\Users\%USERNAME%\AppData\Roaming
+ rmdir /S jan
+ ```
+
+ 3. If you are using a version before `0.4.2`, you need to run the following commands:
+
+ ```bash
+ # Find the process ID (PID) of the nitro process by filtering the list by process name
+ tasklist | findstr "nitro"
+ # Once you have the PID of the process you want to terminate, run the `taskkill`
+ taskkill /F /PID
+ ```
+
+ 4. Download the latest version of Jan from our [homepage](https://jan.ai/).
+
+
+
+ 1. Uninstall Jan
+
+
+
+ To uninstall Jan, use your package manager's uninstall or remove option.
+
+ This will return your system to its state before installation in Jan.
+
+ This method can also reset all settings if you are experiencing any issues with Jan.
+
+
+
+
+ To uninstall Jan, run the following command.MDXContent
+
+ ```bash
+ sudo apt-get remove jan
+ # where Jan is the name of Jan's package
+ ```
+
+ This will return your system to its state before installation in Jan.
+
+ This method can also reset all settings if you are experiencing any issues with Jan.
+
+
+
+
+ You can uninstall Jan by deleting the `.AppImage` file.
+
+ If you wish to remove all user data associated with Jan after uninstallation, you can delete the user data at `~/jan`.
+
+ This method can also reset all settings if you are experiencing any issues with Jan.
+
+
+
+
+ 2. Delete Application Data, Cache, and User Data:
+
+ ```bash
+ # You can delete the user data folders located at the following `~/jan`
+ rm -rf ~/jan
+ ```
+
+ 3. If you are using a version before `0.4.2`, you need to run the following commands:
+
+ ```zsh
+ ps aux | grep nitro
+ # Looks for processes like `nitro` and `nitro_arm_64`, and kill them one by one by process ID
+ kill -9
+ ```
+
+ 4. Download the latest version of Jan from our [homepage](https://jan.ai/).
+
+
+
+
+Following these steps, you can cleanly uninstall and reinstall Jan, ensuring a smooth and error-free experience with the latest version.
+
+
+ Before reinstalling Jan, ensure it's completely removed from all shared spaces if installed on multiple user accounts on your device.
+
+
+## Troubleshooting NVIDIA GPU
+
+To resolve issues when the Jan app does not utilize the NVIDIA GPU on Windows and Linux systems.
+
+#### 1. Ensure GPU Mode Requirements
+
+
+
+
+ ##### NVIDIA Driver
+
+ - Install an [NVIDIA Driver](https://www.nvidia.com/Download/index.aspx) supporting CUDA 11.7 or higher.
+ - Use the following command to verify the installation:
+
+ ```bash
+ nvidia-smi
+ ```
+
+ ##### CUDA Toolkit
+
+ - Install a [CUDA toolkit](https://developer.nvidia.com/cuda-downloads) compatible with your NVIDIA driver.
+ - Use the following command to verify the installation:
+
+ ```bash
+ nvcc --version
+ ```
+
+
+
+
+ ##### NVIDIA Driver
+
+ - Install an [NVIDIA Driver](https://www.nvidia.com/Download/index.aspx) supporting CUDA 11.7 or higher.
+ - Use the following command to verify the installation:
+
+ ```bash
+ nvidia-smi
+ ```
+
+ ##### CUDA Toolkit
+
+ - Install a [CUDA toolkit](https://developer.nvidia.com/cuda-downloads) compatible with your NVIDIA driver.
+ - Use the following command to verify the installation:
+
+ ```bash
+ nvcc --version
+ ```
+ ##### Linux Specifics
+
+ - Ensure that `gcc-11`, `g++-11`, `cpp-11`, or higher is installed.
+ - See [instructions](https://gcc.gnu.org/projects/cxx-status.html#cxx17) for Ubuntu installation.
+
+ - **Post-Installation Actions**: Add CUDA libraries to `LD_LIBRARY_PATH`.
+ - Follow the [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions) instructions.
+
+
+
+
+#### 2. Switch to GPU Mode
+
+If your system supports it, Jan defaults to CPU mode but automatically switches to GPU mode, selecting the GPU with the highest VRAM. Check this setting in `Settings` > `Advanced Settings`.
+
+##### Troubleshooting Tips
+
+If GPU mode isn't enabled by default:
+
+1. Confirm that you have installed an NVIDIA driver supporting CUDA 11.7 or higher. Refer to [CUDA compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
+2. Ensure compatibility of the CUDA toolkit with your NVIDIA driver. Refer to [CUDA compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
+3. Add CUDA's `.so` libraries to the `LD_LIBRARY_PATH` for Linux. Ensure that CUDA's `.dll` libraries are in the PATH for Windows. Refer to [Windows setup](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#environment-setup).
+
+
+If you encounter an error message indicating that loading a model requires additional dependencies, follow these steps:
+
+1. Click on **Install Additional Dependencies** in the error message.
+2. You will be redirected to the **Tensor RT LLM Inference Engine** section.
+3. Click the **Install** button to install additional dependencies.
+
+#### 3. Check GPU Settings
+
+1. Navigate to `Settings` > `Advanced Settings` > `Jan Data Folder` to access GPU settings.
+2. Open the `settings.json` file in the `settings` folder. Here's an example:
+
+```json title="~/jan/data/settings/settings.json"
+{
+ "notify": true,
+ "run_mode": "gpu",
+ "nvidia_driver": {
+ "exist": true,
+ "version": "531.18"
+ },
+ "cuda": {
+ "exist": true,
+ "version": "12"
+ },
+ "gpus": [
+ {
+ "id": "0",
+ "vram": "12282"
+ },
+ {
+ "id": "1",
+ "vram": "6144"
+ },
+ {
+ "id": "2",
+ "vram": "6144"
+ }
+ ],
+ "gpu_highest_vram": "0"
+}
+```
+
+#### 4. Restart Jan
+
+Restart the Jan application to make sure it works.
+
+##### Troubleshooting Tips
+
+- Ensure the `nvidia_driver` and `cuda` fields indicate installed software.
+- If `gpus` field is empty or lacks your GPU, check the NVIDIA driver and CUDA toolkit installations.
+- For further assistance, share the `settings.json` file.
+
+#### Tested Configurations
+
+- **Windows 11 Pro 64-bit:**
+
+ - GPU: NVIDIA GeForce RTX 4070ti
+ - CUDA: 12.2
+ - NVIDIA driver: 531.18 (Bare metal)
+
+- **Ubuntu 22.04 LTS:**
+
+ - GPU: NVIDIA GeForce RTX 4070ti
+ - CUDA: 12.2
+ - NVIDIA driver: 545 (Bare metal)
+
+- **Ubuntu 20.04 LTS:**
+
+ - GPU: NVIDIA GeForce GTX 1660ti
+ - CUDA: 12.1
+ - NVIDIA driver: 535 (Proxmox VM passthrough GPU)
+
+- **Ubuntu 18.04 LTS:**
+ - GPU: NVIDIA GeForce GTX 1660ti
+ - CUDA: 12.1
+ - NVIDIA driver: 535 (Proxmox VM passthrough GPU)
+
+#### Common Issues and Solutions
+
+1. If the issue persists, install the [Nightly version](/guides/quickstart/#nightly-releases).
+2. Ensure your (V)RAM is accessible; some users with virtual RAM may require additional configuration.
+3. Seek assistance in [Jan Discord](https://discord.gg/mY69SZaMaC).
+
+## How to Get Error Logs
+
+To get the error logs of your Jan application, follow the steps below:
+
+#### Jan Application
+
+1. Navigate to the main dashboard.
+2. Click the **gear icon (⚙️)** on the bottom left of your screen.
+3. Under the **Settings screen**, click the **Advanced Settings**.
+4. On the **Jan Data Folder** click the **folder icon (📂)** to access the data.
+5. Click the **logs** folder.
+
+#### Jan UI
+
+1. Open your Unix or Linux terminal.
+2. Use the following commands to get the recent 50 lines of log files:
+
+```bash
+tail -n 50 ~/jan/data/logs/app.log
+
+```
+
+#### Jan API Server
+
+1. Open your Unix or Linux terminal.
+2. Use the following commands to get the recent 50 lines of log files:
+
+```bash
+tail -n 50 ~/jan/data/logs/server.log
+
+```
+
+
+Ensure to redact any private or sensitive information when sharing logs or error details.
+
+
+## Permission Denied
+
+When running Jan, you might encounter the following error message:
+
+```
+Uncaught (in promise) Error: Error invoking layout-480796bff433a3a3.js:538 remote method 'installExtension':
+Error Package /Applications/Jan.app/Contents/Resources/app.asar.unpacked/pre-install/janhq-assistant-extension-1.0.0.tgz does not contain a valid manifest:
+Error EACCES: permission denied, mkdtemp '/Users/username/.npm/_cacache/tmp/ueCMn4'
+```
+
+Permission problems mainly cause this error during installation. To resolve this issue, follow these steps:
+
+1. Open your terminal.
+
+2. Execute the following command to change ownership of the `~/.npm` directory to the current user:
+
+```bash
+sudo chown -R $(whoami) ~/.npm
+```
+
+
+This command ensures that the necessary permissions are granted for Jan's installation, resolving the encountered error.
+
+
+## Something's Amiss
+
+When you start a chat with a model and encounter a Something's Amiss error, here's how to resolve it:
+
+1. Ensure your OS is up to date.
+2. Choose a model smaller than 80% of your hardware's V/RAM. For example, on an 8GB machine, opt for models smaller than 6 GB.
+3. Install the latest [Nightly release](/guides/quickstart/#nightly-releases) or [clear the application cache](#broken-build) when reinstalling Jan.
+4. Confirm your V/RAM accessibility, mainly if using virtual RAM.
+5. Nvidia GPU users should download [CUDA](https://developer.nvidia.com/cuda-downloads).
+6. Linux users, ensure your system meets the requirements of gcc 11, g++ 11, cpp 11, or higher. Refer to this [link](#troubleshooting-nvidia-gpu) for details.
+7. You might use the wrong port when you [check the app logs](#how-to-get-error-logs) and encounter the Bind address failed at 127.0.0.1:3928 error. To check the port status, try using the `netstat` command, like the following:
+
+
+
+ ```bash
+ netstat -an | grep 3928
+ ```
+
+
+ ```bash
+ netstat -ano | find "3928"
+ tasklist /fi "PID eq 3928"
+ ```
+
+
+ ```bash
+ netstat -anpe | grep "3928"
+ ```
+
+
+
+
+ `Netstat` displays the contents of various network-related data structures for active connections.
+
+
+
+ Jan uses the following ports:
+ - Jan and Cortex API Server: `1337`
+ - Jan Documentation: `3001`
+
+
+## Undefined Issue
+If you experience an undefined or unusual issue, please follow the steps below:
+1. Delete Jan's extension folder located at `~/jan/data`.
+2. Restart the Jan application.
+## Unexpected Token
+
+Encountering the `Unexpected token` error when initiating a chat with OpenAI models is mainly caused by your OpenAI key or where you access your OpenAI from. This issue can be solved through the following steps:
+
+1. Obtain an OpenAI API key from [OpenAI's developer platform](https://platform.openai.com/) and integrate it into your application.
+
+2. Using a VPN could potentially solve the issue, especially if it's related to region locking for accessing OpenAI services. Connecting through a VPN may bypass such restrictions and successfully initiate chats with OpenAI models.
+
+
+ If you have any questions or are looking for support, please don't hesitate to contact us via our [Discord community](https://discord.gg/Dt7MxDyNNZ) or create a new issue in our [GitHub repository](https://github.com/janhq/jan/issues/new/choose).
+
diff --git a/docs/src/pages/download.mdx b/docs/src/pages/download.mdx
new file mode 100644
index 0000000000..54f68001f1
--- /dev/null
+++ b/docs/src/pages/download.mdx
@@ -0,0 +1,34 @@
+---
+title: Download
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ architecture,
+ ]
+---
+
+import Download from "@/components/Download"
+
+export const getStaticProps = async() => {
+ const resRelease = await fetch('https://api.github.com/repos/janhq/jan/releases/latest')
+ const release = await resRelease.json()
+
+ return {
+ props: {
+ ssg: {
+ lastRelease: release,
+ },
+ },
+ }
+}
+
+
\ No newline at end of file
diff --git a/docs/src/pages/index.mdx b/docs/src/pages/index.mdx
new file mode 100644
index 0000000000..43c0875d76
--- /dev/null
+++ b/docs/src/pages/index.mdx
@@ -0,0 +1,41 @@
+---
+title: "Jan: Open source ChatGPT-alternative that runs 100% offline"
+description: "Chat with AI without privact concerns. Jan is an open-source alternative to ChatGPT, running AI models locally on your device."
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ architecture,
+ ]
+---
+
+import Home from "@/components/Home"
+
+export const getStaticProps = async() => {
+ const resReleaseLatest = await fetch('https://api.github.com/repos/janhq/jan/releases/latest')
+ const resRelease = await fetch('https://api.github.com/repos/janhq/jan/releases')
+ const resRepo = await fetch('https://api.github.com/repos/janhq/jan')
+ const repo = await resRepo.json()
+ const latestRelease = await resReleaseLatest.json()
+ const release = await resRelease.json()
+
+ return {
+ props: {
+ ssg: {
+ stars: repo.stargazers_count || 2000,
+ lastRelease: latestRelease,
+ release: release,
+ lastVersion: latestRelease.tag_name || 'v.0.0.1'
+ },
+ },
+ }
+}
+
+
diff --git a/docs/src/pages/integrations/_meta.json b/docs/src/pages/integrations/_meta.json
new file mode 100644
index 0000000000..e00ad9deaa
--- /dev/null
+++ b/docs/src/pages/integrations/_meta.json
@@ -0,0 +1,11 @@
+{
+ "-- Switcher": {
+ "type": "separator",
+ "title": "Switcher"
+ },
+ "index": { "title": "Overview", "href": "/integrations", "display": "hidden" },
+ "coding": "Coding",
+ "function-calling": "Function Calling",
+ "messaging": "Messaging",
+ "workflow-automation": "Workflow Automation"
+}
diff --git a/docs/src/pages/integrations/coding/continue-dev.mdx b/docs/src/pages/integrations/coding/continue-dev.mdx
new file mode 100644
index 0000000000..c6b9d50a88
--- /dev/null
+++ b/docs/src/pages/integrations/coding/continue-dev.mdx
@@ -0,0 +1,107 @@
+---
+title: Continue.dev
+description: A step-by-step guide on integrating Jan with Continue and VS Code.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Continue integration,
+ VSCode integration,
+ ]
+---
+
+import { Tabs, Steps } from 'nextra/components'
+
+# Continue.dev
+
+## Integrate with Continue VS Code
+
+[Continue](https://continue.dev/docs/intro) is an open-source autopilot compatible with Visual Studio Code and JetBrains, offering the simplest method to code with any LLM (Local Language Model).
+
+To integrate Jan with a local AI language model, follow the steps below:
+
+
+### Step 1: Installing Continue on Visual Studio Code
+
+Follow this [guide](https://continue.dev/docs/quickstart) to install the Continue extension on Visual Studio Code.
+
+### Step 2: Enable the Jan API Server
+
+To set up Continue for use with Jan's Local Server, you must activate the Jan API Server with your chosen model.
+
+1. Press the `<>` button. Jan will take you to the **Local API Server** section.
+
+2. Setup the server, which includes the **IP Port**, **Cross-Origin-Resource-Sharing (CORS)** and **Verbose Server Logs**.
+
+3. Press the **Start Server** button
+
+### Step 3: Configure Continue to Use Jan's Local Server
+
+1. Go to the `~/.continue` directory.
+
+
+
+ ```bash
+ cd ~/.continue
+ ```
+
+
+ ```bash
+ C:/Users//.continue
+ ```
+
+
+ ```bash
+ cd ~/.continue
+ ```
+
+
+
+```json title="~/.continue/config.json"
+{
+ "models": [
+ {
+ "title": "Jan",
+ "provider": "openai",
+ "model": "mistral-ins-7b-q4",
+ "apiKey": "EMPTY",
+ "apiBase": "http://localhost:1337/v1"
+ }
+ ]
+}
+```
+
+2. Ensure the file has the following configurations:
+ - Ensure `openai` is selected as the `provider`.
+ - Match the `model` with the one enabled in the Jan API Server.
+ - Set `apiBase` to `http://localhost:1337`.
+ - Leave the `apiKey` field to `EMPTY`.
+
+### Step 4: Ensure the Using Model Is Activated in Jan
+
+1. Navigate to `Settings` > `My Models`.
+2. Click the **three dots (⋮)** button.
+3. Select the **Start Model** button to activate the model.
+
+
+
+## How to Use Jan Integration with Continue in Visual Studio Code
+
+### 1. Exploring Code with Jan
+
+1. Highlight a code.
+2. Press `Command + Shift + M` to open the Left Panel.
+3. Click "Jan" at the bottom of the panel and submit your query, such as `Explain this code`.
+
+### 2. Enhancing Code with the Help of a Large Language Model
+
+1. Select a code snippet.
+2. Press `Command + Shift + L`.
+3. Type in your specific request, for example, `Add comments to this code`.
diff --git a/docs/src/pages/integrations/function-calling/interpreter.mdx b/docs/src/pages/integrations/function-calling/interpreter.mdx
new file mode 100644
index 0000000000..92e3f41b8b
--- /dev/null
+++ b/docs/src/pages/integrations/function-calling/interpreter.mdx
@@ -0,0 +1,70 @@
+---
+title: Open Interpreter
+description: A step-by-step guide on integrating Jan with Open Interpreter.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Open Interpreter integration,
+ Open Interpreter,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# Open Interpreter
+
+## Integrate Open Interpreter with Jan
+
+[Open Interpreter](https://github.com/KillianLucas/open-interpreter/) lets LLMs run code (Python, Javascript, Shell, and more) locally. After installing, you can chat with Open Interpreter through a ChatGPT-like interface in your terminal by running `interpreter`. To integrate Open Interpreter with Jan, follow the steps below:
+
+
+
+### Step 1: Install Open Interpreter
+
+1. Install Open Interpreter by running:
+
+```bash
+pip install open-interpreter
+```
+
+2. A Rust compiler is required to install Open Interpreter. If not already installed, run the following command or go to [this page](https://rustup.rs/) if you are running on Windows:
+
+```bash
+sudo apt install rustc
+```
+
+
+ The Rust compiler is necessary for building some native extensions that Open Interpreter requires.
+
+
+### Step 2: Configure Jan's Local API Server
+
+Before using Open Interpreter, configure the model in `Settings` > `My Model` for Jan and activate its local API server.
+
+#### Enabling Jan API Server
+
+1. Click the `<>` button to access the **Local API Server** section in Jan.
+
+2. Configure the server settings, including **IP Port**, **Cross-Origin-Resource-Sharing (CORS)**, and **Verbose Server Logs**.
+
+3. Click **Start Server**.
+
+### Step 3: Set the Open Interpreter Environment
+
+1. For integration, provide the API Base (`http://localhost:1337/v1`) and the model ID (e.g., `mistral-ins-7b-q4`) when running Open Interpreter. For example, see the code below:
+
+```zsh
+interpreter --api_base http://localhost:1337/v1 --model mistral-ins-7b-q4
+```
+
+> **Open Interpreter is now ready for use!**
+
+
\ No newline at end of file
diff --git a/docs/src/pages/integrations/index.mdx b/docs/src/pages/integrations/index.mdx
new file mode 100644
index 0000000000..965779fd50
--- /dev/null
+++ b/docs/src/pages/integrations/index.mdx
@@ -0,0 +1,29 @@
+---
+title: Discord
+description: A step-by-step guide on integrating Jan with a Discord bot.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Discord integration,
+ Discord,
+ bot,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Cards, Card } from 'nextra/components'
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/src/pages/integrations/messaging/llmcord.mdx b/docs/src/pages/integrations/messaging/llmcord.mdx
new file mode 100644
index 0000000000..bffff90d40
--- /dev/null
+++ b/docs/src/pages/integrations/messaging/llmcord.mdx
@@ -0,0 +1,78 @@
+---
+title: llmcord (Discord)
+description: A step-by-step guide on integrating Jan with a Discord bot.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Discord integration,
+ Discord,
+ bot,
+ ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+
+# llmcord (Discord)
+
+## Integrate llmcord.py with Jan
+
+[llmcord.py](https://github.com/jakobdylanc/discord-llm-chatbot) lets you and your friends chat with LLMs directly in your Discord server.
+
+To integrate Jan with llmcord.py, follow the steps below:
+
+
+
+### Step 1: Clone the Repository
+
+Clone the discord bot's [repository](https://github.com/jakobdylanc/discord-llm-chatbot) by using the following command:
+```bash
+git clone https://github.com/jakobdylanc/discord-llm-chatbot.git
+```
+
+### Step 2: Install the Required Libraries
+
+After cloning the repository, run the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+
+ A valid Python installation is required.
+
+
+### Step 3: Set the Environment
+
+1. Create a copy of `.env.example`.
+2. Change the name to `.env`.
+3. Set the environment with the following options:
+
+| Setting | Instructions |
+| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `DISCORD_BOT_TOKEN` | Create a new Discord bot at [discord.com/developers/applications](https://discord.com/developers/applications), obtain a token from the Bot tab, and enable MESSAGE CONTENT INTENT. |
+| `DISCORD_CLIENT_ID` | Found under the OAuth2 tab of the Discord bot you just made. |
+| `LLM` | For Jan, set to `local/openai/(MODEL_NAME)`, where `(MODEL_NAME)` is your loaded model's name. |
+| `LLM_SYSTEM_PROMPT` | Adjust the bot's behavior as needed. |
+| `LOCAL_SERVER_URL` | URL of your local API server. For Jan, set it to `http://localhost:1337/v1`. |
+
+For more configuration options, refer to llmcord.py's [README](https://github.com/jakobdylanc/discord-llm-chatbot/tree/main?tab=readme-ov-file#instructions).
+
+### Step 4: Run the Bot
+
+Run the bot by using the following command in your command prompt:
+
+```bash
+python llmcord.py
+```
+
+The bot's invite URL will be printed in the console. Use it to add the bot to your server.
+
+
\ No newline at end of file
diff --git a/docs/src/pages/integrations/workflow-automation/raycast.mdx b/docs/src/pages/integrations/workflow-automation/raycast.mdx
new file mode 100644
index 0000000000..01c5a4866e
--- /dev/null
+++ b/docs/src/pages/integrations/workflow-automation/raycast.mdx
@@ -0,0 +1,47 @@
+---
+title: Raycast
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ raycast integration,
+ Raycast,
+ ]
+description: A step-by-step guide on integrating Jan with Raycast.
+---
+
+import { Steps } from 'nextra/components'
+
+# Raycast
+
+## Integrate Raycast with Jan
+
+[Raycast](https://www.raycast.com/) is a productivity tool designed for macOS that enhances workflow efficiency by providing quick access to various tasks and functionalities through a keyboard-driven interface. To integrate Raycast with Jan, follow the steps below:
+
+
+### Step 1: Download the TinyLlama Model
+
+1. Open Jan app.
+2. Go to the **Hub** and download the TinyLlama model.
+3. The model will be available at `~jan/models/tinyllama-1.1b`.
+
+### Step 2: Clone and Run the Program
+
+1. Clone this [GitHub repository](https://github.com/InNoobWeTrust/nitro-raycast).
+2. Execute the project using the following command:
+
+```bash title="Node.js"
+npm i && npm run dev
+```
+
+### Step 3: Search for Nitro and Run the Model
+
+Search for `Nitro` using the program, and you can use the models from Jan in RayCast.
+
\ No newline at end of file
diff --git a/docs/src/pages/post/_assets/3090s.jpg b/docs/src/pages/post/_assets/3090s.jpg
new file mode 100644
index 0000000000..3a62b3f6f6
Binary files /dev/null and b/docs/src/pages/post/_assets/3090s.jpg differ
diff --git a/docs/src/pages/post/_assets/4070s.jpg b/docs/src/pages/post/_assets/4070s.jpg
new file mode 100644
index 0000000000..3d86223470
Binary files /dev/null and b/docs/src/pages/post/_assets/4070s.jpg differ
diff --git a/docs/src/pages/post/_assets/4090s.png b/docs/src/pages/post/_assets/4090s.png
new file mode 100644
index 0000000000..2c49a3248a
Binary files /dev/null and b/docs/src/pages/post/_assets/4090s.png differ
diff --git a/docs/src/pages/post/_assets/catastrophic-demo.png b/docs/src/pages/post/_assets/catastrophic-demo.png
new file mode 100644
index 0000000000..7c869fc0e7
Binary files /dev/null and b/docs/src/pages/post/_assets/catastrophic-demo.png differ
diff --git a/docs/src/pages/post/_assets/egpu.jpg b/docs/src/pages/post/_assets/egpu.jpg
new file mode 100644
index 0000000000..9f631d4fd8
Binary files /dev/null and b/docs/src/pages/post/_assets/egpu.jpg differ
diff --git a/docs/src/pages/post/_assets/gradient-decent.gif b/docs/src/pages/post/_assets/gradient-decent.gif
new file mode 100644
index 0000000000..9828f2fe94
Binary files /dev/null and b/docs/src/pages/post/_assets/gradient-decent.gif differ
diff --git a/docs/src/pages/post/_assets/og-4090s.webp b/docs/src/pages/post/_assets/og-4090s.webp
new file mode 100644
index 0000000000..6db1b10b28
Binary files /dev/null and b/docs/src/pages/post/_assets/og-4090s.webp differ
diff --git a/docs/src/pages/post/_assets/openchat-bench-0106.png b/docs/src/pages/post/_assets/openchat-bench-0106.png
new file mode 100644
index 0000000000..9fa37960f1
Binary files /dev/null and b/docs/src/pages/post/_assets/openchat-bench-0106.png differ
diff --git a/docs/src/pages/post/_assets/replay.png b/docs/src/pages/post/_assets/replay.png
new file mode 100644
index 0000000000..8ada6ce84b
Binary files /dev/null and b/docs/src/pages/post/_assets/replay.png differ
diff --git a/docs/src/pages/post/_assets/throughput_Comparison.png b/docs/src/pages/post/_assets/throughput_Comparison.png
new file mode 100644
index 0000000000..6bb63d03ca
Binary files /dev/null and b/docs/src/pages/post/_assets/throughput_Comparison.png differ
diff --git a/docs/src/pages/post/_meta.json b/docs/src/pages/post/_meta.json
new file mode 100644
index 0000000000..e5472dd614
--- /dev/null
+++ b/docs/src/pages/post/_meta.json
@@ -0,0 +1,24 @@
+{
+ "*": {
+ "display": "hidden",
+ "theme": {
+ "breadcrumb": false,
+ "pagination": false
+ }
+ },
+ "all-categories": {
+ "title": "All Categories",
+ "display": "normal",
+ "href": "/blog"
+ },
+ "building-jan": {
+ "title": "Building Jan",
+ "display": "normal",
+ "href": "/blog?category=building-jan"
+ },
+ "research-cat": {
+ "title": "Research",
+ "display": "normal",
+ "href": "/blog?category=research"
+ }
+}
diff --git a/docs/src/pages/post/benchmarking-nvidia-tensorrt-llm.mdx b/docs/src/pages/post/benchmarking-nvidia-tensorrt-llm.mdx
new file mode 100644
index 0000000000..cb384e5539
--- /dev/null
+++ b/docs/src/pages/post/benchmarking-nvidia-tensorrt-llm.mdx
@@ -0,0 +1,319 @@
+---
+title: Benchmarking NVIDIA TensorRT-LLM
+description: This post compares the performance of TensorRT-LLM and llama.cpp on consumer NVIDIA GPUs, highlighting the trade-offs among speed, resource usage, and convenience.
+tags: Nvidia, TensorRT-LLM, llama.cpp, rtx3090, rtx4090, "inference engine"
+categories: research
+ogImage: assets/images/general/og-throughput-benchmark.png
+date: 2024-04-29
+---
+
+import { Callout } from 'nextra/components'
+import CTABlog from '@/components/Blog/CTA'
+
+# Benchmarking NVIDIA TensorRT-LLM
+
+Jan now supports [NVIDIA TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) in addition to [llama.cpp](https://github.com/ggerganov/llama.cpp), making Jan multi-engine and ultra-fast for users with Nvidia GPUs.
+
+We've been excited for TensorRT-LLM for a while, and [had a lot of fun implementing it](https://github.com/janhq/nitro-tensorrt-llm). As part of the process, we've run some benchmarks, to see how TensorRT-LLM fares on consumer hardware (e.g. [4090s](https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/), [3090s](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/)) we commonly see in the [Jan's hardware community](https://discord.com/channels/1107178041848909847/1201834752206974996).
+
+
+ **Give it a try!** Jan's [TensorRT-LLM extension](/docs/built-in/tensorrt-llm) is available in Jan v0.4.9 and up ([see more](/docs/built-in/tensorrt-llm)). We precompiled some TensorRT-LLM models for you to try: `Mistral 7b`, `TinyLlama-1.1b`, `TinyJensen-1.1b` 😂
+
+ Bugs or feedback? Let us know on [GitHub](https://github.com/janhq/jan) or via [Discord](https://discord.com/channels/1107178041848909847/1201832734704795688).
+
+
+
+ **An interesting aside:** Jan actually started out in June 2023 building on [NVIDIA FastTransformer](https://github.com/NVIDIA/FasterTransformer), the precursor library to TensorRT-LLM. TensorRT-LLM was released in September 2023, making it a very young library. We’re excited to see its roadmap develop!
+
+
+## Key Findings
+
+![image](./_assets/throughput_Comparison.png)
+
+TensorRT-LLM was:
+
+- **30-70% faster** than llama.cpp on the same hardware
+- **Consumes less memory on consecutive runs** and **marginally more GPU VRAM utilization** than llama.cpp
+- **20%+ smaller compiled model sizes** than llama.cpp
+- **Less convenient** as models have to be compiled for a specific OS and GPU architecture, vs. llama.cpp's "Compile once, run everywhere" portability
+- **Less accessible** as it does not support older-generation NVIDIA GPUs
+
+## Why TensorRT-LLM?
+
+[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is Nvidia's open-source inference library that incorporates Nvidia's proprietary optimizations beyond the open-source [cuBLAS](https://developer.nvidia.com/cublas) library.
+
+As compared to [llama.cpp](https://github.com/ggerganov/llama.cpp), which today dominates Desktop AI as a cross-platform inference engine, TensorRT-LLM is highly optimized for Nvidia GPUs. While llama.cpp compiles models into a [single, generalizable CUDA "backend"](https://github.com/ggerganov/llama.cpp/blob/master/ggml-cuda.cu) that can run on a wide range of Nvidia GPUs, TensorRT-LLM compiles models into a [GPU-specific execution graph](https://www.baseten.co/blog/high-performance-ml-inference-with-nvidia-tensorrt/) that is highly optimized for that specific GPU's Tensor Cores, CUDA cores, VRAM and memory bandwidth.
+
+TensorRT-LLM is typically used in datacenter-grade GPUs, where it produces a [face-melting 10,000 tokens/s](https://nvidia.github.io/TensorRT-LLM/blogs/H100vsA100.html) on [NVIDIA H100 Tensor Core GPUs](https://www.nvidia.com/en-us/data-center/h100/). We were curious for how TensorRT-LLM performs on consumer-grade GPUs, and gave it a spin.
+
+| Llama.cpp | TensorRT-LLM |
+| ------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| Baseline | Blazing fast (30-70% faster) |
+| Compile once, run cross-platform | Compiled and highly optimized for specific GPU architecture |
+| Generalizable and Portable | Highly-optimized |
+| Model compiles to [single, generalizable CUDA "backend"](https://github.com/ggerganov/llama.cpp/blob/master/ggml-cuda.cu) | Model compiles to [GPU-specific execution graph](https://www.baseten.co/blog/high-performance-ml-inference-with-nvidia-tensorrt/) |
+
+## Experiment Setup
+
+We ran the experiment using standardized inference requests in a sandboxed environment:
+- **Model**: Mistral 7b model, compiled and quantized at a comparable `int4` quantization.
+- **Test runs**: 5 batches of 10 runs each, per inference engine, on a bare metal PC with no other applications.
+- **Parameters**: User defaults, i.e. `batch_size 1`, `input_len 2048` and `output_len 512`
+- **Measurements**:
+ - CPU, memory from Jan system monitor
+ - GPU VRAM utilization metrics from `nvidia-smi`, and taken over an interval of 14 seconds.
+ - Throughput (token/sec) using [Jan's built-in Tokens/sec perf stat](https://github.com/search?q=repo%3Ajanhq%2Fjan%20timeDiffInSeconds&type=code).
+
+
+We picked a [batch size of 1](https://www.reddit.com/r/LocalLLaMA/comments/17sbwo5/what_does_batch_size_mean_in_inference/) to simulate realistic real-world use cases, as Jan users are likely to make one inference request at a time. We also used a `input_len` of 2048 and `output_len` of 512 to simulate a typical chatbot use case.
+
+
+
+**Further Research**: We found GPU VRAM Utilization to be difficult to measure, with `nvidia-smi` on Windows producing a different result from Windows Task Manager's Resource Monitor for NVIDIA GPUs. After some [research](https://forums.developer.nvidia.com/t/how-to-evaluate-gpu-utilization-usage-on-windows/245451/3), we have gone with `nvidia-smi`'s measurement.
+
+
+### Hardware Selection
+
+We chose the following GPUs based on our users' preferences:
+
+| NVIDIA GPU | VRAM (GB) | CUDA Cores | Tensor Cores | Memory Bus Width (bit) | Memory Bandwidth (GB/s) | Connection (GB/s) |
+| --------------------------------- | --------- | ---------- | ------------ | ---------------------- | ----------------------- | -------------------------------------------- |
+| GeForce RTX 4090 (Ada) | 24 | 16,384 | 512 | 384 | ~1000 | PCIe4.0 x16 (~32) |
+| GeForce RTX 3090 (Ampere) | 24 | 10,496 | 328 | 384 | 935.8 | PCIe4.0 x16 (~32) |
+| GeForce RTX 4070 Laptop GPU (Ada) | 8 | 7680 | 144 | 192 | 272 | PCIe4.0 x4 (~8) |
+| GeForce RTX 4090 eGPU (Ada) | 24 | 16,384 | 512 | 384 | ~1000 | Thunderbolt 3 connected to a USB4 USB-C port ([~1.25-5?](https://www.cablematters.com/Blog/Thunderbolt/usb4-vs-thunderbolt-3)) |
+
+### llama.cpp Setup
+
+- llama.cpp commit [15499eb](https://github.com/ggerganov/llama.cpp/commit/15499eb94227401bdc8875da6eb85c15d37068f7)
+- We used `Mistral-7b-q4_k_m` in `GGUF` with `ngl` at `100`
+
+
+Note: `ngl` is the abbreviation of `Number of GPU Layers` with the range from `0` as no GPU acceleration to `120` as full on GPU for some big models.
+
+
+### TensorRT-LLM Setup
+
+- TensorRT-LLM version [0.7.1](https://github.com/NVIDIA/TensorRT-LLM/releases/tag/v0.7.1) and build on Windows
+- For TensorRT-LLM, we used `Mistral-7b-int4 AWQ`
+- We ran TensorRT-LLM with `free_gpu_memory_fraction` to test it with the lowest VRAM consumption
+- Note: We picked AWQ for TensorRT-LLM to be a closer comparison to GGUF's Q4.
+
+## Results
+
+### NVIDIA GeForce RTX 4090 GPU
+
+![image](./_assets/4090s.png)
+*Jan is built on this Dual-4090 workstation, which recently got upgraded to a nice case*
+
+![image](./_assets/og-4090s.webp)
+*The original case (or lack thereof) for our Dual-4090 cluster, as posted on [r/localllama](https://www.reddit.com/r/LocalLLaMA/comments/16lxt6a/case_for_dual_4090s/)*
+
+
+ **Hardware Details**
+- CPU: Intel 13th series
+- GPU: NVIDIA GeForce RTX 4090 (Ada - sm 89)
+- RAM: 32GB
+- OS: Windows 11 Pro
+
+**Model Details**
+- llama.cpp model: Mistral 7B v0.2 GGUF Q4_K_M
+- TensorRT-LLM model: Mistral 7B v0.2 AWQ, quantized for single GPU (Ada)
+
+
+For this test, we used Jan's [Dual-4090 workstation](https://www.reddit.com/r/LocalLLaMA/comments/16lxt6a/case_for_dual_4090s/), which our engineers timeshare to build Jan.
+
+The [NVIDIA GeForce RTX 4090](https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/) is the latest top-of-the-line desktop GPU, with an MSRP of $1,599, and uses the Ada architecture. It has a ~1000 GB/s memory bandwidth within VRAM, and a PCIe4 x16 lane (~32 GB/s) between the GPU and the CPU.
+
+
+| Metrics | GGUF (using CPU) | GGUF (using GPU) | TensorRT-LLM | How TensorRT-LLM Compares |
+| ------------------------ | ---------------- | ---------------- | ------------ | ------------------------- |
+| Throughput (token/s) | 14.0 | 100.43 | 170.63 | ✅ 69.89% faster |
+| Max GPU Utilization (%) | N/A | 83.50 | 88.50 | 5.99% more |
+| Max VRAM Utilization (%) | N/A | 64 | 72.1 | 12.66% more |
+| Avg RAM Used (GB) | 0.611 | 7.105 | 4.98 | ✅ 29.88% less |
+| Disk Size (GB) | 4.07 | 4.06 | 3.05 | ✅ 24.88% smaller |
+
+TensorRT-LLM was almost 70% faster than llama.cpp by building the model for the GeForce RTX 4090 GPU’s Ada architecture for optimal graph execution, fully utilizing the 512 Tensor Cores, 16,384 CUDA cores, and 1,000 GB/s of memory bandwidth.
+
+The intuition for why llama.cpp is slower is because it compiles a model into a [single, generalizable CUDA “backend”](https://github.com/ggerganov/llama.cpp/blob/master/ggml-cuda.cu) that can run on many NVIDIA GPUs. Doing so requires llama.cpp to sacrifice all the optimizations that TensorRT-LLM makes with its compilation to a GPU-specific execution graph.
+
+### NVIDIA GeForce RTX 3090 GPU
+
+![image](./_assets/3090s.jpg)
+*Our 3090 Machine, now used by one of our engineers to build Jan*
+
+
+ **Hardware Details**
+- CPU: Intel 13th series
+- GPU: NVIDIA GeForce RTX 3090 (Ampere - sm 86)
+- RAM: 64GB
+- OS: Windows 11 Pro
+
+**Model Details**
+- llama.cpp model: Mistral 7B v0.2 GGUF Q4_K_M
+- TensorRT-LLM model: Mistral 7B v0.2 AWQ, quantized for single GPU (Ampere)
+
+
+The [NVIDIA's GeForce RTX 3090](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3090-3090ti/) is a popular desktop GPU, and retails for approximately $1,500 (as of April 24). It uses the NVIDIA Ampere architecture. As compared to its successor GeForce RTX 4090, it has 33% fewer CUDA cores (10,496) and Tensor Cores (328) and 7% less memory bandwidth (~930 GB/s).
+
+
+| Metrics | GGUF (using CPU) | GGUF (using GPU) | TensorRT-LLM | How TensorRT-LLM Compares |
+| ------------------------ | ---------------- | ---------------- | ------------ | ------------------------- |
+| Throughput (token/s) | 11.42 | 88.70 | 144.19 | ✅ 62.57% faster |
+| Max GPU Utilization (%) | N/A | 80.40 | 89.10 | 10.82% more |
+| Max VRAM Utilization (%) | N/A | 66.80 | 76.20 | 14.07% more |
+| Avg RAM Used (GB) | 0.611 | 2.60 | 0.98 | 62.41%% less |
+| Disk Size (GB) | 4.07 | 4.06 | 3.05 | ✅ 24.88% smaller |
+
+Interestingly, the GeForce RTX 3090 was only 16.6% slower compared with the GeForce RTX 4090. On TPS, TensorRT-LLM outperformed llama.cpp by 62.57%. Curiously, it also used negligible RAM for subsequent inference requests after the initial model warmup.
+
+### NVIDIA GeForce RTX 4070 Laptop GPU
+
+![image](./_assets/4070s.jpg)
+
+
+ **Hardware Details**
+- Laptop: Razer Blade 14
+- CPU: AMD Ryzen™ 9 8945HS, 8C/16T
+- GPU: NVIDIA GeForce RTX 4070 Laptop GPU (Ada - sm 89) on PCIe 4.0 x16 (32 GB/s)
+- RAM: 32GB
+- OS: Windows 11 Pro
+
+**Model Details**
+- llama.cpp model: Mistral 7B v0.2 GGUF `Q4_K_M`
+- TensorRT-LLM model: Mistral 7B v0.2 AWQ, quantized for single GPU (Ada)
+
+
+We also benchmarked an NVIDIA GeForce RTX 4070 Laptop GPU with 8gb of VRAM, which is a popular configuration among Jan users. Laptop GPUs are less powerful than their desktop counterparts, as they trade portability for reduced energy consumption and thermal constraints.
+
+| Metrics | GGUF (using CPU) | GGUF (using GPU) | TensorRT-LLM | Difference on GPU |
+| ------------------------ | ---------------- | ---------------- | ------------ | ----------------- |
+| Throughput (token/s) | 11.57 | 39.70 | 51.57 | ✅ 29.9% faster |
+| Max GPU Utilization (%) | N/A | 80.00 | 84.67 | 5.83% more |
+| Max VRAM Utilization (%) | N/A | 72.78 | 81.22 | 11.60% more |
+| Avg RAM Used (GB) | 4.49 | 4.44 | 1.04 | ✅ 76.55%% less |
+| Disk Size (GB) | 4.07 | 4.06 | 3.05 | ✅ 24.88% smaller |
+
+TensorRT-LLM on the laptop dGPU was 29.9% faster in tokens per second throughput than llama.cpp, but significantly slower than the desktop GPUs.
+
+The intuition for this is fairly simple: the GeForce RTX 4070 Laptop GPU has 53.1% fewer CUDA cores and Tensor Cores (compared to the 4090), and less VRAM (8gb vs. 24gb). This reduces the surface area for GPU-specific optimizations for TensorRT-LLM.
+
+The GeForce RTX 4070 Laptop GPU is also ~70% slower than the GeForce RTX 4090 desktop GPU, showing the hardware effect of less electricity draw, less VRAM, and thermal constraints on inference speed.
+
+### Laptop with NVIDIA GeForce RTX 4090 eGPU
+
+![image](./_assets/egpu.jpg)
+
+
+ **Hardware Details**
+- Laptop: Razer Blade 14
+- eGPU box: AORUS RTX 4090 GAMING BOX
+- CPU: AMD Ryzen™ 9 8945HS, 8C/16T
+- GPU: NVIDIA GeForce RTX 4090 (Ada - sm 89) on eGPU with Thunderbolt 3 connection
+- RAM: 32GB
+- OS: Windows 11 Pro
+
+**Model Details**
+- llama.cpp model: Mistral 7B v0.2 GGUF `Q4_K_M`
+- TensorRT-LLM model: Mistral 7B v0.2 AWQ, quantized for single GPU (Ampere)
+
+
+Our last benchmark was to experiment with an [Asus RTX 4090 eGPU](https://www.gigabyte.com/Graphics-Card/GV-N4090IXEB-24GD), that was connected via a [Thunderbolt 3 port](https://www.gigabyte.com/Graphics-Card/GV-N4090IXEB-24GD) to the [Razer Blade 14's USB4 port](https://www.razer.com/sg-en/gaming-laptops/razer-blade-14). Theoretically, the results should be fairly similar to the GeForce RTX 4090 desktop GPU as they have identical underlying GPUs, but with very different connection speeds.
+
+We thought it would be an interesting to see how TensorRT-LLM handles a 68.4% reduction in communication bandwidth between the CPU and GPU:
+- Thunderbolt 3 connection (1.25-5 GB/s?) for eGPUs
+- PCIe 4.0 x4 (~8 GB/s) for "on device" desktops
+
+
+**Help Needed:** We are actually not 100% sure what the actual Connection Speed is [between Thunderbolt 3 to USB4](https://www.cablematters.com/Blog/Thunderbolt/usb4-vs-thunderbolt-3), as the actual speed seems different from the advertised speed of 5 GB/s. There seem to be a lot of factors involved, including the actual cable itself. We'd love if someone in the community can guide us!
+
+
+Overall, we used mid-to-high-end NVIDIA desktop GPUs for our tests, as TensorRT-LLM’s performance enhancements are most apparent on bigger VRAMs. For users with lower-spec machines, llama.cpp is better.
+
+| Metrics | GGUF (using CPU) | GGUF (using GPU) | TensorRT-LLM | Difference on GPU |
+| ------------------------ | ---------------- | ---------------- | ------------ | ----------------- |
+| Throughput (token/s) | 11.56 | 62.22 | 104.95 | ✅ 68.66% faster |
+| Max VRAM Utilization (%) | 0 | 65 | 99 | 52.31% more |
+| RAM Used (GB) | 0.611 | 5.38 | 4.11 | ✅ 23.61% less |
+| Disk Size (GB) | 4.07 | 4.06 | 3.05 | ✅ 24.88% smaller |
+
+The Thunderbolt 3 eGPU had a 38.5% lower tokens/s as compared to the PCIe4.0 x16 connected GPU. But the % speedup vs. llama.cpp was similar, at around 69%.
+
+Interestingly, the VRAM used with the eGPU was variably higher. Our hypothesis is that the slower communication bandwidth results in more VRAM being allocated, as memory is released mostly slowly as well.
+
+## Conclusion
+
+### Token Speed
+
+![image](./_assets/throughput_Comparison.png)
+
+| Throughput (Higher is Better) | TensorRT-LLM | Llama.cpp | % Difference |
+| ---------------------------------- | --------------- | ----------- | ------------- |
+| GeForce RTX 4090 desktop GPU | ✅ 170.63t/s | 100.43t/s | 69.89% faster |
+| GeForce RTX 3090 desktop GPU | ✅ 144.19t/s | 88.70t/s | 62.57% faster |
+| GeForce RTX 4090 eGPU | ✅ 104.95t/s | 62.22t/s | 68.66% faster |
+| GeForce RTX 4070 Laptop GPU | ✅ 51.57t/s | 39.70t/s | 29.90% faster |
+| Laptop AMD Ryzen™ 9 8945HS, 8C/16T | (Not supported) | ✅ 11.57t/s | |
+
+- TensorRT-LLM is up to **70% faster** than llama.cpp on desktop GPUs (e.g. 3090 GPU, 4090 GPUs) while using less RAM & CPU (but more fully utilizing VRAM)
+- TensorRT-LLM is up to **30% faster** on laptop GPUs (e.g. 4070 GPUs) with smaller VRAM
+
+### Max VRAM Utilization
+
+| Average VRAM utilization % | TensorRT-LLM | Llama.cpp | % Difference |
+| ---------------------------- | ------------ | --------- | ------------ |
+| GeForce RTX 4090 desktop GPU | 72.10 | 64.00 | 12.66% more |
+| GeForce RTX 3090 desktop GPU | 76.20 | 66.80 | 14.07% more |
+| GeForce RTX 4070 Laptop GPU | 81.22 | 72.78 | 11.06% more |
+| GeForce RTX 4090 eGPU | N/A | N/A | N/A |
+
+- TensorRT-LLM used marginally more average VRAM utilization at peak utilization vs. llama.cpp (up to 14%). Though this could have interesting implications on consuming more electricity over time.
+- Note: we used comparable (but not identical) quantizations, and TensorRT-LLM’s `AWQ INT4` is implemented differently from llama.cpp’s `q4_k_m`
+
+### Max RAM Usage
+
+| Max RAM utilization | TensorRT-LLM | Llama.cpp | % Difference |
+| ---------------------------- | ------------ | --------- | ---------------- |
+| GeForce RTX 4090 desktop GPU | ✅ 4.98 | 7.11 | ✅ 29.88% less |
+| GeForce RTX 3090 desktop GPU | ✅ 0.98 | 2.60 | ✅ 62.41% less |
+| GeForce RTX 4070 Laptop GPU | ✅ 1.04 | 4.44 | ✅ 76.55%% less |
+| GeForce RTX 4090 eGPU | ✅ 4.11 | 5.38 | ✅ 23.61% less |
+
+TensorRT-LLM uses a lot less Max RAM vs. llama.cpp on slower connection (PCIe 3.0 or Thunderbolt 3) due to better memory management and efficient delegation to VRAM. On faster connection, it’s at least equal to llama.cpp.
+
+### Compiled Model Size and Number of Files
+- Contrary to popular belief, TensorRT-LLM prebuilt models turned out to not be that huge
+- Mistral 7b int4 was actually 25% smaller in TensorRT-LLM, at 3.05gb vs. 4.06gb
+- Note: These are approximate comparisons, as TensorRT-LLM’s AWQ INT4 is implemented differently from llama.cpp’s q4_k_m
+- The bigger takeaway is that the Compiled model sizes are roughly in the same ballpark, while the number of files for TensorRT-LLM is 7x the GGUF number of files.
+
+| Model size (Lower is better) | TensorRT-LLM AWQ int4 | Llama.cpp GGUF Q4 | % Difference |
+| ---------------------------- | --------------------- | ----------------- | ----------------- |
+| Mistral 7B | ✅ 3.05GB | 4.06GB | ✅ 24.88% smaller |
+
+### Convenience
+- Llama.cpp still wins on cross-platform versatility and convenience of a “compile once, run everywhere” approach
+- TensorRT-LLM still requires compilation to specific OS and architecture, though this could be solved by pre-compiling and publishing models on [Nvidia's NGC Model Catalog](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/collections/codellama)
+
+### Accessibility
+- Llama.cpp unsurprisingly beats TensorRT-LLM in terms of accessibility
+- TensorRT-LLM does not support older NVIDIA GPUs and won’t work well on smaller VRAM cards (e.g. 2-4gb VRAM)
+
+## Final Notes
+
+Our benchmarking is not perfect. We evaluated over a dozen tools ([llmperf](https://github.com/ray-project/llmperf), [psutil](https://pypi.org/project/psutil/), [gpustat](https://github.com/wookayin/gpustat), native utilities, and more) and found that everyone measures TPS, common metrics differently. We eventually settled on using our own tools in Jan, which are consistent across any inference engine and hardware. As for runtime parameters, we went with default settings, likely representing the typical user experience.
+
+We also did not overclock for this benchmark , as it is not a default setting for most users. But we've measured in our tests that TensorRT-LLM can go even faster with a few tweaks. We see this as a pretty exciting future direction.
+
+
+ **How to Overclock:** We found an additional **15% increase in performance** with TensorRT-LLM by:
+- Enabling [XMP](https://www.intel.com/content/www/us/en/gaming/extreme-memory-profile-xmp.html)
+- Overclocking RAM bus speed in BIOS from `3600` to `5600`.
+
+
+We're also publishing the underlying [raw experimental data](https://drive.google.com/file/d/1rDwd8XD8erKt0EgIKqOBidv8LsCO6lef/view?usp=sharing), and would encourage the community to scrutinize and help us improve.
+
+Special thanks to Asli Sabanci Demiroz, Annamalai Chockalingam, Jordan Dodge from Nvidia, and Georgi Gerganov from llama.cpp for feedback, review and suggestions.
+
+
diff --git a/docs/src/pages/post/bitdefender.mdx b/docs/src/pages/post/bitdefender.mdx
new file mode 100644
index 0000000000..f3a3ed48e0
--- /dev/null
+++ b/docs/src/pages/post/bitdefender.mdx
@@ -0,0 +1,148 @@
+---
+title: 'Bitdefender False Positive Flag'
+description: "10th January 2024, Jan's 0.4.4 Release on Windows triggered Bitdefender to incorrectly flag it as infected with Gen:Variant.Tedy.258323, leading to automatic quarantine warnings on users' computers."
+date: 2024-01-10
+tags: postmortem, bitdefender
+categories: building-jan
+keywords:
+ [
+ postmortem,
+ bitdefender,
+ false positive,
+ antivirus,
+ jan,
+ nitro,
+ incident,
+ incident response,
+ supply chain security,
+ user communication,
+ documentation,
+ antivirus compatibility,
+ cross-platform testing,
+ proactive incident response,
+ user education,
+ lessons learned,
+ ]
+---
+
+import CTABlog from '@/components/Blog/CTA'
+
+# Bitdefender False Positive Flag
+
+Following the recent incident related to Jan version 0.4.4 triggering Bitdefender on Windows with Gen:Variant.Tedy.258323 on January 10, 2024, we wanted to provide a comprehensive postmortem and outline the necessary follow-up actions.
+
+## Incident Overview
+
+### Bug Description
+
+Jan 0.4.4 installation on Windows triggered Bitdefender to flag it as infected with Gen:Variant.Tedy.258323, leading to automatic quarantine.
+
+### Affected Antivirus
+
+- McAfee / Microsoft Defender was unaffected
+- Bitdefender consistently flagged the issue.
+
+### Incident Timeline
+
+- _10 Jan, 2:18 am SGT:_ Hawke flags up Malware antivirus errors for 0.4.4 installation on Windows computers.
+- _10 Jan, 2:21 am SGT:_ @0xSage responds in Discord.
+- _10 Jan, 2:35 am SGT:_ Hawke confirms multiple people have experienced this error on fresh installs.
+- _10 Jan, 2:41 am SGT:_ @louis-jan and @dan-jan revert 0.4.4 out of an abundance of caution.
+- _Incident ongoing:_ To triage and investigate the next day.
+- _10 Jan, 11:36 am SGT:_ @Hien has investigated all versions of Nitro and conducted scans using Bitdefender. Only the 2 latest versions raised warnings (0.2.7, 0.2.8).
+- _10 Jan, 12:44 pm SGT:_ @Hien tested again for the 0.2.6 and suggested using 0.2.6 for now, the 2 remaining Nitro version (0.2.7, 0.2.8) will under further investigation.
+- The team started testing on the fixed build.
+- _10 Jan, 3:22 pm SGT:_ Diagnosis found that it's most likely a false positive. @Hien has only found a solution by attempting to build Nitro Windows CPU on a GitHub-hosted runner and hasn't identified the root cause yet.
+- _10 Jan, 5:24 pm SGT:_ @Hien testing two scenarios and still trying to understand the workings of Bitdefender.
+- _11 Jan, 5:46 pm SGT:_ Postmortem meeting
+
+## Investigation Update
+
+- @Hien has investigated all versions of Nitro and conducted scans using Bitdefender. and only the 2 latest versions raised warnings from Bitdefender. Nitro 0.2.6, which is the highest version without the issue, was tested again, and it no longer triggers a warning from Bitdefender.
+- We have observed that Nitro versions up to 0.2.6 remain unaffected. However, Bitdefender flags versions 0.2.7 and 0.2.8 as infected, leading to the deletion. In order to proceed with the current release, Hien suggests downgrading Nitro to version 0.2.6 and conducting tests with this version. Simultaneously, he will investigate why Bitdefender is flagging versions 0.2.7 and 0.2.8.
+- It's essential to note that between versions 0.2.6, 0.2.7, and 0.2.8, only minor changes were made, which should not trigger a malicious code warning. We can refer to the changelog between 0.2.7 and 0.2.8 to pinpoint these changes.
+- Our primary message is to convey that we did not introduce malicious code into Jan (indicating a false positive), and the investigation aims to understand the root cause behind Bitdefender flagging versions 0.2.7 and 0.2.8.
+- The current diagnosis looks like a false positive but it's still under investigation. Reference link: [here](https://stackoverflow.com/questions/75886428/fake-positive-bit-defender-problem-genvariant-tedy-304469), [here](https://stackoverflow.com/questions/58010466/bitdefender-detects-my-console-application-as-genvariant-ursu-56053), and [here](https://www.cisa.gov/sites/default/files/2023-06/mar-10365227.r1.v1.clear_.pdf).
+- @Hien testing two scenarios and still trying to understand the workings of Bitdefender. Still under investigation: is the issue with the code or the CI?
+ - In Case 1, using the same CI agent for tags 0.2.6 and 0.2.8, after PRs by Alan and myself, Bitdefender flagged the Nitro CPU binary build. Naturally, one would conclude this is due to the code.
+ - However, I proceeded with a further experiment: for the 0.2.8 code, instead of using our CI agent, I used a GitHub hosted agent. This time, Bitdefender did not flag our binary build.
+- We've identified the Bitdefender warning was not an attack. There is no malicious code
+- We've isolated the event to originate from a CI agent, which resulted in a BitDefender false positive alert.
+
+## Follow-ups and Action Items
+
+1. **Reproduce Bitdefender Flag in Controlled Environment [Done]:**
+
+ - _Objective:_ To replicate the issue in a controlled environment to understand the triggers and specifics of Bitdefender's detection.
+
+2. **Investigate Malicious Code or False Positive:**
+
+ - _Objective:_ Determine whether the flagged issue is a result of actual malicious code or a false positive. If it's a false positive, work towards resolution while communicating with Bitdefender.
+
+3. **Supply Chain Attack Assessment:**
+
+ - _Objective:_ Evaluate the possibility of a supply chain attack. Investigate whether the Nitro 0.4.4 distribution was compromised or tampered with during the release process.
+
+4. **Testing after the Hotfix:**
+
+ - _Objective:_ In addition to verifying the issue after the fix, it is essential to conduct comprehensive testing across related areas, ensuring compatibility across different operating systems and antivirus software (latest version / free version only).
+
+5. **Process Improvement for Future Releases:**
+
+ - _Objective:_ Identify and implement improvements to our release process to prevent similar incidents in the future. This may include enhanced testing procedures, code analysis, and collaboration with antivirus software providers during the pre-release phase. Additionally, we should add verifying the latest antivirus software in the release checklist.
+
+6. **Documentation of Tested Antivirus Versions:**
+ - _Objective:_ Create a document that outlines the testing conducted, including a matrix that correlates Jan versions with the tested antivirus versions.
+ - _Sample list:_ for consideration purpose
+ - Bitdefender
+ - McAfee
+ - Avira
+ - Kaspersky
+ - Norton
+ - Microsoft defender
+ - AVG
+ - TotalAV
+
+## Next Steps
+
+- The team should follow up on each action item with clear ownership priority, and deadlines.
+- Communicate progress transparently with the community and clients through appropriate channels. If any insights or suggestions, share them within the dedicated channels.
+- Update internal documentation and procedures based on the lessons learned from this incident.
+
+## Lessons Learned
+
+1. **Antivirus Compatibility Awareness:**
+
+ - _Observation:_ The incident underscored the significance of recognizing and testing for antivirus compatibility, particularly with widely-used solutions like Bitdefender.
+ - _Lesson Learned:_ In the future, we will integrate comprehensive checks for compatibility with various antivirus software, including both antivirus and "Malicious Code Detection," into our CI or QA checklist. This proactive measure aims to minimize false positive detections during the release and testing processes.
+
+2. **Cross-Platform Testing:**
+
+ - _Observation:_ The problem did not occur on MacOS and Linux systems, implying a potential oversight in cross-platform testing during our release procedures.
+ - _Lesson Learned:_ Clarification — This observation is not directly related to antivirus testing. Instead, it underscores the necessity to improve our testing protocols, encompassing multiple operating systems. This ensures a thorough evaluation of potential issues on diverse platforms, considering the various antivirus software and differences in architectures on Mac and Linux systems.
+
+3. **User Communication and Documentation:**
+
+ - _Observation:_ Due to the timely response from Nicole, who was still active on Discord and Github at 2 am, this quick response facilitated our ability to assess the impact accurately.
+ - _Lesson Learned:_ While our communication with users was effective in this instance, it was mainly due to Nicole's presence during the incident. To improve our overall response capability, we should prioritize "24/7 rapid triage and response." This involves ensuring continuous availability or establishing a reliable rotation of team members for swift user communication and issue documentation, further enhancing our incident response efficiency.
+
+4. **Proactive Incident Response:**
+
+ - _Observation:_ The incident response, while involving a prompt version rollback, experienced a slight delay due to the release occurring at midnight. This delay postponed the initiation of the investigation until the next working hours.
+ - _Lesson Learned:_ Recognizing the importance of swift incident response, particularly in time-sensitive situations, we acknowledge that releasing updates during off-hours can impact the immediacy of our actions. Moving forward, we will strive to optimize our release schedules to minimize delays and ensure that investigations can commence promptly regardless of the time of day. This may involve considering alternative release windows or implementing automated responses to critical incidents, ensuring a more proactive and timely resolution.
+
+5. **Supply Chain Security Measures:**
+
+ - _Observation:_ While the incident prompted consideration of a potential supply chain attack, it's crucial to emphasize that this was not the case. Nonetheless, the incident underscored the importance of reviewing our supply chain security measures.
+ - _Lesson Learned:_ Going forward, we should strengthen supply chain security by introducing additional verification steps to uphold the integrity of our release process. Collaborating with distribution channels is essential for enhancing security checks and ensuring a robust supply chain.
+ - _Longer-term:_ Exploring options for checking Jan for malicious code and incorporating antivirus as part of our CI/CD pipeline should be considered for a more comprehensive and proactive approach.
+
+6. **User Education on False Positives:**
+ - _Observation:_ Users reported Bitdefender automatically "disinfecting" the flagged Nitro version without allowing any user actions.
+ - _Lesson Learned:_ Educate users about the possibility of false positives and guide them on how to whitelist or report such incidents to their antivirus provider (if possible). Provide clear communication on steps users can take in such situations.
+
+These lessons learned will serve as a foundation for refining our processes and ensuring a more resilient release and incident response framework in the future. Continuous improvement is key to maintaining the reliability and security of our software.
+
+Thank you for your dedication and cooperation in resolving this matter promptly.
+
+
\ No newline at end of file
diff --git a/docs/src/pages/post/data-is-moat.mdx b/docs/src/pages/post/data-is-moat.mdx
new file mode 100644
index 0000000000..ec056599b4
--- /dev/null
+++ b/docs/src/pages/post/data-is-moat.mdx
@@ -0,0 +1,112 @@
+---
+title: "The Invisible Moat around Open-Source LLM"
+description: "Uncover the pivotal role of data ownership in training the next iteration of LLM."
+tags: OpenAI has a moat, Catastrophic forgetting, ChatGPT
+date: 2024-03-25
+unlisted: true
+categories: research
+---
+
+import CTABlog from '@/components/Blog/CTA'
+
+# The Invisible Moat around Open-Source LLM
+
+In the crowded AI landscape, OpenAI's ChatGPT stands out, not just for its capabilities but for its unique access to the pre-trained dataset. This post explores the vital role of data in maintaining a competitive edge, focusing on OpenAI's strategic advantage through data ownership.
+
+## Data: The Secret Weapon
+OpenAI, with ChatGPT, has carved a distinct advantage. By harnessing user interactions, it gains invaluable insights into diverse use cases, enabling precise model refinements. The cornerstone of this advantage lies in the "pre-trained dataset." This treasure trove of data empowers OpenAI to cater to specific needs, ensuring sustained improvement and differentiation.
+
+## The rise of the opensource
+
+```
+- How they/Mistral/Llama make money?
+-> around having pretrained data -> finetuning
+First para:
+Rise of Open Source LLMs like Mistral, Llama2, Llama3
+People think they don't have a moat = everything is open source
+Second para:
+We actually think these guys have an "invisible moat"
+Pre-training data is not released, and makes a huge difference in fine-tuning efficacy
+```
+
+### Why pretrained data is important?
+
+> *Owning the pre-trained dataset is crucial as it represents the original distribution.*
+Access to the pre-trained dataset acts as a master key to address the critical issue of ["Catastrophic forgetting"](https://en.wikipedia.org/wiki/Catastrophic_interference) in Language Learning Models (LLMs). This phenomenon describes how LLMs lose hold of prior knowledge upon learning new information. Access to the foundational dataset allows for effective fine-tuning, balancing the introduction of new data with the retention of existing knowledge.
+
+![Catastrophic forgetting](./_assets/catastrophic-demo.png)
+
+**Figure 1.** Demonstrates the catastrophic forgetting issue: without mixing datasets, AI overfits on new tasks, impairing normal communication.
+
+### Illustrating Catastrophic Forgetting
+
+```
+What is fine-tuning
+Process of Finetuning (pretrain, instruct, finetune)
+Fine-tuning datasets
+Risk of catastrophic forgetting
+"Why is Pre-trained data important?"
+What is pre-training dataset
+How does fine-tuning with pre-training dataset differ from when you don't have it
+How does it avoid catastrophic forgetting
+```
+
+Catastrophic forgetting can be visualized as a ball in a multidimensional landscape, where moving towards new knowledge risks losing grasp on the old.
+Pre-trained data acts as a map, guiding fine-tuning in a way that incorporates new information while safeguarding existing knowledge.
+
+![Gradient decent](./_assets/gradient-decent.gif)
+
+**Figure 2.** [Gradient decent demonstration](https://en.wikipedia.org/wiki/Gradient_descent)
+
+### Smoothing Distribution Shifts
+
+As described above, with the mixture of the pre-trained dataset ensures smoother distribution shifts when introducing new information, as it embodies a comprehensive spectrum of prior knowledge.
+
+This continuity in knowledge transition helps in maintaining the robustness of the model against sudden changes, akin to providing a more gradual learning curve where the new information is incrementally integrated with the existing knowledge base.
+
+This concept is supported by the [EleutherAI's research](https://arxiv.org/abs/2403.08763) highlighting the importance of how tasks are sequenced in the learning process, suggesting that introducing dissimilar tasks early on can expand the network's capacity for new information.
+
+**Table 1.** Final results for English-only 405M parameter models trained with different replay amounts show models with more replay perform better in balancing learning and forgetting (measured as AVG Loss). Notably, just 1% mix with a pre-trained dataset significantly lowers AVG loss, effectively shifting model knowledge from English (the Pile) to German.
+
+![Replay method](./_assets/replay.png)
+
+*Note:* **Replay** is the method involves combining the training dataset from the pre-trained model with new task datasets.
+
+### Acting as a Noise Mask
+
+The pre-trained data can also serve as a form of "noise masking", similar to techniques used in training [early computer vision models](https://arxiv.org/abs/1911.04252).
+
+This approach introduces a level of ["noise"](https://arxiv.org/abs/2310.05914) during training, which can prevent the model from overfitting to the new dataset. By retaining a mix of original and new data, the model is exposed to a broader range of scenarios, enhancing its generalization capabilities and robustness across tasks.
+
+## Solutions
+
+### Overwholming approach
+
+Overcoming these challenges requires a balanced approach. One partial method involves inundating the model with extensive, curated data, allowing for comprehensive fine-tuning. While effective, this approach demands significant computational resources, a comprehensive filtering process for low-quality inputs, and an extraordinarily high cost associated with gathering millions of high-quality responses.
+
+In the open-source community, 2 notable examples of fine-tuning with Mistral as a base model on large datasets collected from top-rated GPT-4 and human responses demonstrate a distribution shift that enhances model performance, including [OpenChat](https://huggingface.co/openchat/openchat-3.5-0106) and [Hermes-Pro](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B).
+
+![Openchat results](./_assets/openchat-bench-0106.png)
+
+**Figure 2.** After fine-tuning with a large amount of data samples, the model's performance improved, outperforming ChatGPT and Grok-1 in some benchmarks.
+
+### Fully open source model
+
+- Example: Dolma + olma from allenai
+
+
+## Conclusion
+
+The ownership and strategic use of pre-trained data serve as an invisible moat. It not only enables the tackling of complex challenges like catastrophic forgetting but also provides a baseline for continuous, targeted improvements. Although there is a solution to decomotralize, the cost remains reasonably high.
+
+Fully open pretrained + open weight
+
+## Reference
+- [Catastrophic forgetting](https://arxiv.org/abs/2308.08747)
+- [Simple and Scalable Strategies to Continually Pre-train Large Language Models](https://arxiv.org/abs/2403.08763)
+- [Gradient descent](https://en.wikipedia.org/wiki/Gradient_descent)
+- [Neftune](https://arxiv.org/abs/2310.05914)
+- [Self-training with Noisy Student improves ImageNet classification](https://arxiv.org/abs/1911.04252)
+
+
+
\ No newline at end of file
diff --git a/docs/src/pages/post/rag-is-not-enough.mdx b/docs/src/pages/post/rag-is-not-enough.mdx
new file mode 100644
index 0000000000..699954f580
--- /dev/null
+++ b/docs/src/pages/post/rag-is-not-enough.mdx
@@ -0,0 +1,136 @@
+---
+title: "RAG is not enough: Lessons from Beating GPT-3.5 on Specialized Tasks with Mistral 7B"
+description: We present a straightforward approach to customizing small, open-source models using fine-tuning and RAG that outperforms GPT-3.5 for specialized use cases.
+tags: RAG, opensource chatgpt alternative, outperform ChatGPT, Mistral
+date: 2024-03-25
+unlisted: true
+categories: research
+---
+
+import CTABlog from '@/components/Blog/CTA'
+
+# RAG is not enough: Lessons from Beating GPT-3.5 on Specialized Tasks with Mistral 7B
+
+## Abstract
+
+We present a straightforward approach to customizing small, open-source models using fine-tuning and RAG that outperforms GPT-3.5 for specialized use cases. With it, we achieved superior Q&A results of [technical documentation](https://nitro.jan.ai/docs) for a small codebase [codebase](https://github.com/janhq/nitro).
+
+In short, (1) extending a general foundation model like [Mistral](https://huggingface.co/mistralai/Mistral-7B-v0.1) with strong math and coding, and (2) training it over a high-quality, synthetic dataset generated from the intended corpus, and (3) adding RAG capabilities, can lead to significant accuracy improvements.
+
+Problems still arise with catastrophic forgetting in general tasks, commonly observed during specilizied domain fine-tuning. In our case, this is likely exacerbated by our lack of access to Mistral’s original training dataset and various compression techniques used in our approach to keep the model small.
+
+## Selecting a strong foundation model
+
+[Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) outshines both [Meta's Llama-2 7B](https://huggingface.co/meta-llama/Llama-2-7b) and [Google's Gemma 7B](https://huggingface.co/google/gemma-7b) in key benchmarks, making it our choice for a base model. Starting with a strong foundation like Mistral allowed us to achieve greater accuracy in our specialized adaptations.
+
+![image](https://hackmd.io/_uploads/S1TN64kTa.png)
+
+*Figure 1. Mistral 7B excels in benchmarks, ranking among the top foundational models.*
+
+*Note: We are not sponsored by the Mistral team, though lots of folks like to run Mistral locally using [Jan](https://jan.ai/)., our desktop client.*
+
+## Cost effectively improving the base model
+
+Our technical use case required excellent math capabilities, an area where Mistral can underperform. Thus, we tested a number of Mistral model variants, from foundation models to finetunes to model merges, to find a stronger base model before proceeding to finetuning.
+
+![image](https://hackmd.io/_uploads/SkYBaVk6a.png)
+
+
+*Figure 2: The merged model, Stealth, doubles the mathematical capabilities of its foundational model while retaining the performance in other tasks.*
+
+We found merging models is quick and cost-effective, enabling fast adjustments based on the result of each iteration.
+
+We ended up with [Stealth 7B v1.1](https://huggingface.co/jan-hq/stealth-v1.1), a [SLERP](https://github.com/Digitous/LLM-SLERP-Merge) merge of Mistral with the following:
+
+- [WizardMath](https://huggingface.co/WizardLM/WizardMath-7B-V1.1) for its math capabilities.
+- [WizardCoder](https://huggingface.co/WizardLM/WizardCoder-Python-7B-V1.0) for its coding capabilities.
+- Our own [Trinity](https://huggingface.co/jan-hq/trinity-v1.2) model for its versatility across general tasks.
+
+This particular combination yielded the best tradeoff across mathematical & technical reasoning while retaining the most pre-merge performance on general tasks.
+
+## **DPO finetuning**
+
+Merging different LLMs can lead to a mixed answering style because each model was originally trained on different types of data.
+
+Thus, we applied Direct Preference Optimization ([DPO](https://arxiv.org/abs/2305.18290)) using the [Intel's Orca DPO pairs](https://huggingface.co/datasets/Intel/orca_dpo_pairs) dataset, chosen for its helpful answering style in general, math and coding concentration.
+
+This approach produced a final model - [Stealth 7B v1.2](https://huggingface.co/jan-hq/stealth-v1.2), aligned to our technical preferences and demonstrating minimal loss.
+
+## **Using our own technical documentation**
+
+With the base model ready, we started on our specific use case.
+
+Jan is an open-source project enjoying strong growth, but at one point we began receiving a new support ticket every minute, which quickly overwhelmed our bootstrapped resources.
+
+So, we directed our efforts toward training a model to answer user questions based on existing technical documentation.
+
+Specifically, we trained it on on the [Nitro documentation](https://nitro.jan.ai/docs). For context, Nitro is the default inference engine for Jan. It’s a enterprise-ready server implementation of LlamaCPP, written in C++, with multimodal, queues, and other production-level server capabilities.
+
+It made an interesting corpus because it was rife with post-2023 technical jargon, edge cases, and poor informational layout.
+
+## Generating training data
+
+The first step was to transform Nitro’s unstructured format into a synthetic Q&A dataset designed for [instruction tuning](https://arxiv.org/pdf/2109.01652.pdf).
+
+The text was split into chunks of 300-token segments with 30-token overlaps. This helped to avoid a [lost-in-the-middle](https://arxiv.org/abs/2307.03172) problem where LLM can’t use context efficiently to answer given questions.
+
+The chunks were then given to GPT-4 with 8k context length to generate 3800 Q&A pairs. The [training dataset](https://huggingface.co/datasets/jan-hq/nitro_binarized_v2) is available on HuggingFace.
+
+## **Training**
+
+Training was done with supervised finetuning (SFT) from the [Hugging Face's alignment-handbook](https://github.com/huggingface/alignment-handbook), per [Huggingface's Zephyr Beta](https://github.com/huggingface/alignment-handbook/tree/main/recipes/zephyr-7b-beta) guidelines.
+
+We used consumer-grade, dual Nvidia RTX 4090s for the training. The end-to-end training took 18 minutes. We found optimal hyperparameters in LoRA for this specific task to be `r = 256` and `alpha = 512`.
+
+This final model can be found [here on Huggingface](https://huggingface.co/jan-hq/nitro-v1.2-e3).
+
+![image](https://hackmd.io/_uploads/SJyDTVk6p.png)
+
+
+*Figure 3. Using the new finetuned model in [Jan](https://jan.ai/)*
+
+## Improving results with RAG
+
+As an additional step, we also added [Retrieval Augmented Generation (RAG)](https://blogs.nvidia.com/blog/what-is-retrieval-augmented-generation/) as an experiment parameter.
+
+A simple RAG setup was done using **[Llamaindex](https://www.llamaindex.ai/)** and the **[bge-en-base-v1.5 embedding](https://huggingface.co/BAAI/bge-base-en-v1.5)** model for efficient documentation retrieval and question-answering. You can find the RAG implementation [here](https://github.com/janhq/open-foundry/blob/main/rag-is-not-enough/rag/nitro_rag.ipynb).
+
+## Benchmarking the Results
+
+We curated a new set of [50 multiple-choice questions](https://github.com/janhq/open-foundry/blob/main/rag-is-not-enough/rag/mcq_nitro.csv) (MCQ) based on the Nitro docs. The questions had varying levels of difficulty and had trick components that challenged the model's ability to discern misleading information.
+
+![image](https://hackmd.io/_uploads/By9vaE1Ta.png)
+
+
+*Figure 4. Comparation between finetuned model and OpenAI's GPT*
+
+**Results**
+
+- GPT-3.5 with RAG: 56.7%
+- GPT-4 with RAG: 64.3%
+- Merged 7B Model ([Stealth 7B](https://huggingface.co/jan-hq/stealth-v1.3)) with RAG: 47.7%
+- Finetuned 7B Model (Nitro 7B) with RAG: 57.8%
+
+This indicates that with task-specific training, we can improve an open-source, Small Language Model to the level of GPT-3.5 on domain knowledge.
+
+Notably, the finetuned + RAG approach also demonstrated more consistency across benchmarking, as indicated by its lower standard deviation.
+
+## Conclusion
+
+We conclude that this combination of model merging + finetuning + RAG yields promise. This finding is relevant for teams and individuals that need specialized, technical small language models that need to run in resource-constrained or highly secured environments, where GPT may not be an option.
+
+Anecdotally, we’ve had some success using this model in practice to onboard new team members to the Nitro codebase.
+
+A full research report with more statistics can be found [here](https://github.com/janhq/open-foundry/blob/main/rag-is-not-enough/README.md).
+
+# References
+
+- [Catastrophic forgetting](https://arxiv.org/abs/2308.08747)
+- [Math specialization](https://arxiv.org/abs/2308.09583)
+- [Code specialization](https://arxiv.org/abs/2306.08568)
+- [Search specialization](https://github.com/SciPhi-AI/agent-search)
+- [Evol Instruct](https://github.com/nlpxucan/WizardLM)
+- [Lost in the middle](https://arxiv.org/abs/2307.03172)
+- [Instruction tuning](https://arxiv.org/pdf/2109.01652.pdf)
+
+
\ No newline at end of file
diff --git a/docs/src/pages/privacy.mdx b/docs/src/pages/privacy.mdx
new file mode 100644
index 0000000000..bb77327e63
--- /dev/null
+++ b/docs/src/pages/privacy.mdx
@@ -0,0 +1,52 @@
+---
+title: Privacy
+---
+
+import { Callout } from 'nextra/components'
+
+# Privacy
+
+Homebrew Computer Company is committed to protecting your privacy and ensuring that your personal information is handled safely and responsibly. This policy outlines how we collect, store, and use your personal information when you use any of our products.
+
+## Data Collection
+
+Jan, Cortex, and all Homebrew Computer Company products do not collect personally identifying information. You can read about [our philosophy](/about#philosophy) here and audit our open-source codebases.
+
+### When you voluntarily provide data
+
+We -do- collect personal information you voluntarily provide us, e.g., when you sign up for our newsletter, join our Discord, or contact us via email.
+
+### Jan
+
+Jan runs with privacy by default and is used 100% offline on your own computer. Your data (e.g., conversation history, usage logs) are stored locally and never leave your computer.
+
+
+If you use a Remote AI API (e.g., OpenAI API, Groq API), your data will naturally travel to their servers. They will be subject to the privacy policy of the respective API provider.
+
+
+Jan uses [Umami](https://umami.is/) for analytics, which is a privacy-focused, GDPR-compliant analytics tool that does not track personal information. We use this to get aggregate reports on OS and hardware types and prioritize our engineering roadmap. As per [Umami's Privacy Policy](https://umami.is/privacy), Umami uses the following data points to generate its reports:
+
+- OS and device characteristics
+- IP address
+
+Jan does not get any of this data, and we do not track IP addresses or other identifying information. We are actively looking into more privacy-respecting ways to handle analytics, crash reports, and telemetry and would love to work with the community on this.
+
+### Cortex
+
+Cortex is a library that runs large language models (LLMs) locally on your computer. Cortex does not collect any personal information.
+
+## Data Sharing
+
+We do not share your personal information with third parties except as required by law or as necessary to provide you with the services you have requested.
+
+## Data Security
+
+We take the security of your personal information seriously and have implemented appropriate technical and organizational measures to protect your personal information from unauthorized access, disclosure, or misuse.
+
+## Your Choices
+
+You have the right to access, update, and delete your personal information at any time. If you subscribed to our newsletter, you may also opt-out of receiving marketing communications from us by following the unsubscribe link included in our emails.
+
+## Contact Us
+
+If you have any questions or concerns about our privacy policy, please contact us at hello@jan.ai.
diff --git a/docs/src/pages/support.mdx b/docs/src/pages/support.mdx
new file mode 100644
index 0000000000..ef3c90e7f6
--- /dev/null
+++ b/docs/src/pages/support.mdx
@@ -0,0 +1,10 @@
+---
+title: Support - Jan
+---
+
+# Support
+
+- Bugs & requests: file a GitHub ticket [here](https://github.com/janhq/jan/issues)
+- For discussion: join our Discord [here](https://discord.gg/FTk2MvZwJH)
+- For business inquiries: email hello@jan.ai
+- For jobs: please email hr@jan.ai
\ No newline at end of file
diff --git a/docs/src/styles/changelog.scss b/docs/src/styles/changelog.scss
new file mode 100644
index 0000000000..9ea2b73250
--- /dev/null
+++ b/docs/src/styles/changelog.scss
@@ -0,0 +1,16 @@
+.changelog-markdown {
+ h2 {
+ @apply mt-8 text-2xl font-bold tracking-tight text-slate-900 dark:text-slate-100;
+ }
+
+ ul {
+ @apply mt-6 list-disc first:mt-0 ltr:ml-6 rtl:mr-6;
+ li {
+ @apply my-2;
+ }
+ }
+
+ p {
+ @apply mt-6 leading-7 first:mt-0;
+ }
+}
diff --git a/docs/src/styles/fonts.scss b/docs/src/styles/fonts.scss
new file mode 100644
index 0000000000..c7e7db47c0
--- /dev/null
+++ b/docs/src/styles/fonts.scss
@@ -0,0 +1,46 @@
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap');
+
+@font-face {
+ font-family: 'PPEditorialNew';
+ src: url('../../public/assets/fonts/PPEditorialNew-Regular.otf')
+ format('opentype');
+ font-weight: 400;
+}
+
+@font-face {
+ font-family: 'PPEditorialNew';
+ src: url('../../public/assets/fonts/PPEditorialNew-RegularItalic.otf')
+ format('opentype');
+ font-weight: 400;
+ font-style: italic;
+}
+
+@font-face {
+ font-family: 'PPEditorialNew';
+ src: url('../../public/assets/fonts/PPEditorialNew-Ultrabold.otf')
+ format('opentype');
+ font-weight: 700;
+}
+
+@font-face {
+ font-family: 'PPEditorialNew';
+ src: url('../../public/assets/fonts/PPEditorialNew-UltraboldItalic.otf')
+ format('opentype');
+ font-weight: 700;
+ font-style: italic;
+}
+
+@font-face {
+ font-family: 'PPEditorialNew';
+ src: url('../../public/assets/fonts/PPEditorialNew-Ultralight.otf')
+ format('opentype');
+ font-weight: 300;
+}
+
+@font-face {
+ font-family: 'PPEditorialNew';
+ src: url('../../public/assets/fonts/PPEditorialNew-UltralightItalic.otf')
+ format('opentype');
+ font-weight: 300;
+ font-style: italic;
+}
diff --git a/docs/src/styles/general.scss b/docs/src/styles/general.scss
new file mode 100644
index 0000000000..f991e816f8
--- /dev/null
+++ b/docs/src/styles/general.scss
@@ -0,0 +1,84 @@
+@layer base {
+ body {
+ @apply antialiased;
+ font-size: 16px;
+ }
+
+ button {
+ &:focus-visible {
+ box-shadow: none !important;
+ }
+ }
+
+ img {
+ pointer-events: none;
+ }
+
+ // tweaks some class code hike
+ .ch-codeblock {
+ box-shadow: none !important;
+ }
+
+ .card-wrapper {
+ border-radius: 16px;
+ background: linear-gradient(180deg, #fafafa 0%, #ededed 100%);
+ box-shadow:
+ 0px 4px 12px 0px rgba(0, 0, 0, 0.12),
+ 0px -1px 1px 0px rgba(0, 0, 0, 0.1) inset,
+ 0px 1px 1px 0px #fff inset;
+ }
+
+ .card-wrapper-dark {
+ border-radius: 12px;
+ background: var(
+ --Linear-dark,
+ linear-gradient(
+ 268deg,
+ rgba(117, 117, 117, 0.38) 0%,
+ rgba(121, 119, 128, 0.33) 100%
+ )
+ );
+ box-shadow:
+ 0px 4px 12px 0px rgba(0, 0, 0, 0.12),
+ 0px -1px 1px 0px rgba(0, 0, 0, 0.1) inset,
+ 0px 0.2px 1px 0px #fff inset;
+ }
+
+ // tweaks some class nextra
+ .nextra-banner-container {
+ padding-left: 8px !important;
+ background: radial-gradient(
+ 58.83% 95.12% at 62.37% 97.91%,
+ rgba(238, 203, 255, 0.59) 0%,
+ rgba(255, 255, 255, 0) 100%
+ ),
+ linear-gradient(
+ 249deg,
+ rgba(67, 119, 233, 0.8) 93.59%,
+ rgba(194, 226, 255, 0.8) 110.85%
+ ) !important;
+ }
+
+ footer {
+ background-color: transparent !important;
+ }
+
+ .nextra-wrap-container {
+ @apply mx-auto max-w-[90rem] pl-[max(env(safe-area-inset-left),1.5rem)] pr-[max(env(safe-area-inset-right),1.5rem)];
+ }
+
+ nav {
+ justify-content: flex-start;
+ > a {
+ @media (min-width: 768px) {
+ margin-right: 0 !important;
+ }
+ }
+ > a:nth-child(2) {
+ margin-left: 16px;
+ }
+ > a:nth-child(4) {
+ margin-right: auto !important;
+ }
+ }
+}
diff --git a/docs/src/styles/main.scss b/docs/src/styles/main.scss
new file mode 100644
index 0000000000..591b041029
--- /dev/null
+++ b/docs/src/styles/main.scss
@@ -0,0 +1,8 @@
+@import 'tailwindcss/base';
+@import 'tailwindcss/components';
+@import 'tailwindcss/utilities';
+
+@import './general.scss';
+@import './changelog.scss';
+@import './fonts.scss';
+@import './wall-of-love.scss';
diff --git a/docs/src/styles/wall-of-love.scss b/docs/src/styles/wall-of-love.scss
new file mode 100644
index 0000000000..69ef2d82a6
--- /dev/null
+++ b/docs/src/styles/wall-of-love.scss
@@ -0,0 +1,166 @@
+.embla {
+ overflow: hidden;
+}
+.embla__container {
+ display: flex;
+ align-items: flex-start;
+}
+.embla__slide {
+ flex: 0 0 100%;
+}
+
+.react-tweet-theme {
+ --tweet-container-margin: 1.5rem 0;
+ --tweet-header-font-size: 0.9375rem;
+ --tweet-header-line-height: 1.25rem;
+ --tweet-body-font-size: 1.25rem;
+ --tweet-body-font-weight: 400;
+ --tweet-body-line-height: 1.5rem;
+ --tweet-body-margin: 0;
+ --tweet-quoted-container-margin: 0.75rem 0;
+ --tweet-quoted-body-font-size: 0.938rem;
+ --tweet-quoted-body-font-weight: 400;
+ --tweet-quoted-body-line-height: 1.25rem;
+ --tweet-quoted-body-margin: 0.25rem 0 0.75rem 0;
+ --tweet-info-font-size: 0.9375rem;
+ --tweet-info-line-height: 1.25rem;
+ --tweet-actions-font-size: 0.875rem;
+ --tweet-actions-line-height: 1rem;
+ --tweet-actions-font-weight: 700;
+ --tweet-actions-icon-size: 1.25em;
+ --tweet-actions-icon-wrapper-size: calc(
+ var(--tweet-actions-icon-size) + 0.75em
+ );
+ --tweet-replies-font-size: 0.875rem;
+ --tweet-replies-line-height: 1rem;
+ --tweet-replies-font-weight: 700;
+}
+
+.light {
+ .react-tweet-theme {
+ --tweet-skeleton-gradient: linear-gradient(
+ 270deg,
+ #fafafa,
+ #eaeaea,
+ #eaeaea,
+ #fafafa
+ );
+ --tweet-border: 1px solid #cfd9de;
+ --tweet-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
+ Helvetica, Arial, sans-serif;
+ --tweet-font-color: #0f1419;
+ --tweet-font-color-secondary: #536471;
+ --tweet-bg-color: #fff;
+ --tweet-bg-color-hover: #f7f9f9;
+ --tweet-quoted-bg-color-hover: rgba(0, 0, 0, 0.03);
+ --tweet-color-blue-primary: #1d9bf0;
+ --tweet-color-blue-primary-hover: #1a8cd8;
+ --tweet-color-blue-secondary: #006fd6;
+ --tweet-color-blue-secondary-hover: rgba(0, 111, 214, 0.1);
+ --tweet-color-red-primary: #f91880;
+ --tweet-color-red-primary-hover: rgba(249, 24, 128, 0.1);
+ --tweet-color-green-primary: #00ba7c;
+ --tweet-color-green-primary-hover: rgba(0, 186, 124, 0.1);
+ --tweet-twitter-icon-color: var(--tweet-font-color);
+ --tweet-verified-old-color: #829aab;
+ --tweet-verified-blue-color: var(--tweet-color-blue-primary);
+ }
+}
+
+.dark {
+ .react-tweet-theme {
+ --tweet-skeleton-gradient: linear-gradient(
+ 270deg,
+ #15202b,
+ #1e2732,
+ #1e2732,
+ #15202b
+ );
+ --tweet-border: 1px solid #425364;
+ --tweet-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
+ Helvetica, Arial, sans-serif;
+ --tweet-font-color: #f7f9f9;
+ --tweet-font-color-secondary: #8b98a5;
+ --tweet-bg-color: #333333;
+ --tweet-bg-color-hover: #1e2732;
+ --tweet-quoted-bg-color-hover: hsla(0, 0%, 100%, 0.03);
+ --tweet-color-blue-primary: #1d9bf0;
+ --tweet-color-blue-primary-hover: #1a8cd8;
+ --tweet-color-blue-secondary: #6bc9fb;
+ --tweet-color-blue-secondary-hover: rgba(107, 201, 251, 0.1);
+ --tweet-color-red-primary: #f91880;
+ --tweet-color-red-primary-hover: rgba(249, 24, 128, 0.1);
+ --tweet-color-green-primary: #00ba7c;
+ --tweet-color-green-primary-hover: rgba(0, 186, 124, 0.1);
+ --tweet-twitter-icon-color: var(--tweet-font-color);
+ --tweet-verified-old-color: #829aab;
+ --tweet-verified-blue-color: #fff;
+ }
+}
+
+.tweet-wrapper {
+ font-size: 14px;
+ [class*='tweet-container_root__'] {
+ max-width: none;
+ }
+ [class*='actions_'] {
+ display: none;
+ }
+ [class*='authorFollow_'] {
+ display: none;
+ }
+ [class*='tweet-info_info__'] {
+ display: none;
+ }
+ [class*='tweet-body_root__'] {
+ font-size: 16px;
+ margin-bottom: 10px;
+ }
+ [class*='tweet-media_root__'] {
+ margin-bottom: 10px;
+ }
+ [class*='tweet-header_brand__'] {
+ display: none;
+ }
+}
+
+.embla__controls {
+ margin-bottom: 20px;
+ display: flex;
+ justify-content: center;
+}
+.embla__buttons {
+ display: grid;
+ grid-template-columns: repeat(2, 1fr);
+ gap: 0.6rem;
+ align-items: center;
+}
+.embla__button {
+ -webkit-tap-highlight-color: rgba(var(--text-high-contrast-rgb-value), 0.5);
+ -webkit-appearance: none;
+ appearance: none;
+ background-color: transparent;
+ touch-action: manipulation;
+ display: inline-flex;
+ text-decoration: none;
+ cursor: pointer;
+ padding: 0;
+ margin: 0;
+ box-shadow: inset 0 0 0 0.2rem var(--detail-medium-contrast);
+ width: 3rem;
+ height: 3rem;
+ z-index: 1;
+ border-radius: 24%;
+ color: var(--text-body);
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ @apply border dark:border-gray-600 border-neutral-800;
+}
+.embla__button:disabled {
+ color: var(--detail-high-contrast);
+}
+.embla__button__svg {
+ width: 24%;
+ height: 24%;
+}
diff --git a/docs/src/types/blog.d.ts b/docs/src/types/blog.d.ts
new file mode 100644
index 0000000000..61471c0d40
--- /dev/null
+++ b/docs/src/types/blog.d.ts
@@ -0,0 +1,8 @@
+type BlogPostsThumbnail = {
+ title: string | null
+ url: string | null
+ description: string | null
+ date: string | null
+ tags: string[] | null
+ categories: string[] | null
+}
diff --git a/docs/src/types/release.d.ts b/docs/src/types/release.d.ts
new file mode 100644
index 0000000000..6867aa7dee
--- /dev/null
+++ b/docs/src/types/release.d.ts
@@ -0,0 +1,8 @@
+type Changelog = {
+ title: string
+ description: string
+ date: string
+ ogImage: string
+ version: string
+ url: string
+}
diff --git a/docs/src/utils/format.ts b/docs/src/utils/format.ts
new file mode 100644
index 0000000000..2e8aa428ce
--- /dev/null
+++ b/docs/src/utils/format.ts
@@ -0,0 +1,20 @@
+export function formatCompactNumber(count: number) {
+ const formatter = Intl.NumberFormat('en', { notation: 'compact' })
+ return formatter.format(count)
+}
+
+export const totalDownload = (release: []) => {
+ if (release instanceof Array) {
+ const count = release
+ .map((version: { assets: any[] }) =>
+ version.assets.map((os) => os.download_count)
+ )
+ .map((x: any[]) => x.reduce((a: any, b: any) => a + b, 0))
+ .reduce((a: any, b: any) => a + b, 0)
+
+ return formatCompactNumber(count)
+ } else {
+ // return dummy avoid reate limit API when dev mode
+ return formatCompactNumber(9000000)
+ }
+}
diff --git a/docs/static/img/linux.png b/docs/static/img/linux.png
new file mode 100644
index 0000000000..754d978f6a
Binary files /dev/null and b/docs/static/img/linux.png differ
diff --git a/docs/static/img/mac.png b/docs/static/img/mac.png
new file mode 100644
index 0000000000..31fa44e6ee
Binary files /dev/null and b/docs/static/img/mac.png differ
diff --git a/docs/static/img/windows.png b/docs/static/img/windows.png
new file mode 100644
index 0000000000..016359c024
Binary files /dev/null and b/docs/static/img/windows.png differ
diff --git a/docs/tailwind.config.ts b/docs/tailwind.config.ts
new file mode 100644
index 0000000000..86f5f6c871
--- /dev/null
+++ b/docs/tailwind.config.ts
@@ -0,0 +1,35 @@
+import type { Config } from 'tailwindcss'
+
+const config: Config = {
+ darkMode: 'selector',
+
+ content: [
+ './src/pages/**/*.{js,ts,jsx,tsx,mdx}',
+ './src/components/**/*.{js,ts,jsx,tsx,mdx}',
+ './src/app/**/*.{js,ts,jsx,tsx,mdx}',
+ './theme.config.tsx',
+ ],
+ theme: {
+ container: {
+ center: true,
+ padding: '16px',
+ },
+ fontFamily: {
+ sans: [
+ 'Inter',
+ '-apple-system',
+ 'BlinkMacSystemFont',
+ 'Segoe UI',
+ 'Roboto',
+ 'Oxygen-Sans',
+ 'Ubuntu,Cantarell',
+ 'Helvetica',
+ 'sans-serif',
+ ],
+ serif: ['PPEditorialNew'],
+ },
+ extend: {},
+ },
+ plugins: [],
+}
+export default config
diff --git a/docs/templates/{{slug}}.mdx.hbs b/docs/templates/{{slug}}.mdx.hbs
new file mode 100644
index 0000000000..ab40fc4340
--- /dev/null
+++ b/docs/templates/{{slug}}.mdx.hbs
@@ -0,0 +1,8 @@
+---
+title: {{titleCase title}}
+description: {{description}}
+categories: {{categories}}
+date: {{date}}
+---
+
+# {{capitalize title}}
\ No newline at end of file
diff --git a/docs/theme.config.tsx b/docs/theme.config.tsx
new file mode 100644
index 0000000000..d0c15a7045
--- /dev/null
+++ b/docs/theme.config.tsx
@@ -0,0 +1,189 @@
+import React, { Fragment } from 'react'
+import { useConfig, DocsThemeConfig } from 'nextra-theme-docs'
+import LogoMark from '@/components/LogoMark'
+import FooterMenu from '@/components/FooterMenu'
+import JSONLD from '@/components/JSONLD'
+import { useRouter } from 'next/router'
+import Link from 'next/link'
+import { LibraryBig, Blocks, BrainCircuit, Computer } from 'lucide-react'
+import { AiOutlineGithub } from 'react-icons/ai'
+import { BiLogoDiscordAlt } from 'react-icons/bi'
+import { RiTwitterXFill } from 'react-icons/ri'
+
+const defaultUrl = 'https://jan.ai'
+const defaultImage = 'https://jan.ai/assets/images/general/og-image.png'
+
+const structuredData = {
+ '@context': 'https://schema.org',
+ '@type': 'Organization',
+ 'name': 'Jan',
+ 'url': `${defaultUrl}`,
+ 'logo': `${defaultImage}`,
+}
+
+const config: DocsThemeConfig = {
+ logo: (
+
+
+ )
+ }
+ return title
+ },
+ defaultMenuCollapseLevel: 1,
+ toggleButton: true,
+ },
+ toc: {
+ backToTop: true,
+ },
+ head: function useHead() {
+ const { title, frontMatter } = useConfig()
+ const titleTemplate = (frontMatter?.title || title) + ' - ' + 'Jan'
+ const { asPath } = useRouter()
+
+ return (
+
+
+
+ {titleTemplate}
+
+
+
+
+
+
+
+ keyword) || [
+ 'Jan',
+ 'Customizable Intelligence, LLM',
+ 'local AI',
+ 'privacy focus',
+ 'free and open source',
+ 'private and offline',
+ 'conversational AI',
+ 'no-subscription fee',
+ 'large language models',
+ 'build in public',
+ 'remote team',
+ 'how we work',
+ ]
+ }
+ />
+
+
+ )
+ },
+ footer: {
+ text: ,
+ },
+ nextThemes: {
+ defaultTheme: 'light',
+ },
+}
+
+export default config
diff --git a/docs/tsconfig.json b/docs/tsconfig.json
new file mode 100644
index 0000000000..42dcf99560
--- /dev/null
+++ b/docs/tsconfig.json
@@ -0,0 +1,32 @@
+{
+ "compilerOptions": {
+ "lib": ["dom", "dom.iterable", "esnext"],
+ "allowJs": true,
+ "skipLibCheck": true,
+ "strict": true,
+ "noEmit": true,
+ "esModuleInterop": true,
+ "module": "esnext",
+ "moduleResolution": "bundler",
+ "resolveJsonModule": true,
+ "isolatedModules": true,
+ "jsx": "preserve",
+ "incremental": true,
+ "paths": {
+ "@/*": ["./src/*"]
+ },
+ "plugins": [
+ {
+ "name": "next"
+ }
+ ]
+ },
+ "include": [
+ "next-env.d.ts",
+ "**/*.ts",
+ "**/*.tsx",
+ "src/pages/**/*.mdx",
+ ".next/types/**/*.ts"
+ ],
+ "exclude": ["node_modules"]
+}
diff --git a/electron/utils/setup.ts b/electron/utils/setup.ts
index 437e21f977..39b8a41335 100644
--- a/electron/utils/setup.ts
+++ b/electron/utils/setup.ts
@@ -1,4 +1,4 @@
-import { app } from 'electron'
+import { app, screen } from 'electron'
import Store from 'electron-store'
const DEFAULT_WIDTH = 1000
@@ -22,13 +22,42 @@ export const getBounds = async () => {
height: DEFAULT_HEIGHT,
}
- const bounds = await storage.get('windowBounds')
- if (bounds) {
- return bounds as Electron.Rectangle
- } else {
+ const bounds = (await storage.get('windowBounds')) as
+ | Electron.Rectangle
+ | undefined
+
+ // If no bounds are saved, use the defaults
+ if (!bounds) {
storage.set('windowBounds', defaultBounds)
return defaultBounds
}
+
+ // Validate that the bounds are on a valid display
+ const displays = screen.getAllDisplays()
+ const isValid = displays.some((display) => {
+ const { x, y, width, height } = display.bounds
+ return (
+ bounds.x >= x &&
+ bounds.x < x + width &&
+ bounds.y >= y &&
+ bounds.y < y + height
+ )
+ })
+
+ // If the position is valid, return the saved bounds, otherwise return default bounds
+ if (isValid) {
+ return bounds
+ } else {
+ const primaryDisplay = screen.getPrimaryDisplay()
+ const resetBounds = {
+ x: primaryDisplay.bounds.x,
+ y: primaryDisplay.bounds.y,
+ width: DEFAULT_WIDTH,
+ height: DEFAULT_HEIGHT,
+ }
+ storage.set('windowBounds', resetBounds)
+ return resetBounds
+ }
}
export const saveBounds = (bounds: Electron.Rectangle | undefined) => {
diff --git a/extensions/inference-martian-extension/resources/settings.json b/extensions/inference-martian-extension/resources/settings.json
index 2341ad6cd7..6825099f5e 100644
--- a/extensions/inference-martian-extension/resources/settings.json
+++ b/extensions/inference-martian-extension/resources/settings.json
@@ -14,7 +14,7 @@
{
"key": "chat-completions-endpoint",
"title": "Chat Completions Endpoint",
- "description": "The endpoint to use for chat completions. See the [Martian API documentation](https://docs.withmartian.com/martian-model-router/api-reference/get-chat-completions) for more information.",
+ "description": "The endpoint to use for chat completions. See the [Martian API documentation](https://docs.withmartian.com/martian-model-router/getting-started/quickstart-integrating-martian-into-your-codebase) for more information.",
"controllerType": "input",
"controllerProps": {
"placeholder": "https://withmartian.com/api/openai/v1/chat/completions",
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index 42c31938e8..15ceaf5662 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -1,7 +1,7 @@
{
"name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine",
- "version": "1.0.19",
+ "version": "1.0.20",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
diff --git a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json b/extensions/inference-nitro-extension/resources/models/llava-13b/model.json
index caca33b7e0..6d94fd2724 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llava-13b/model.json
@@ -12,7 +12,7 @@
"id": "llava-13b",
"object": "model",
"name": "LlaVa 13B Q4",
- "version": "1.1",
+ "version": "1.2",
"description": "LlaVa can bring vision understanding to Jan",
"format": "gguf",
"settings": {
@@ -24,7 +24,8 @@
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
- "max_tokens": 4096
+ "max_tokens": 4096,
+ "stop": [""]
},
"metadata": {
"author": "liuhaotian",
diff --git a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json b/extensions/inference-nitro-extension/resources/models/llava-7b/model.json
index b61ec38c2c..1fdd75247b 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llava-7b/model.json
@@ -12,7 +12,7 @@
"id": "llava-7b",
"object": "model",
"name": "LlaVa 7B",
- "version": "1.1",
+ "version": "1.2",
"description": "LlaVa can bring vision understanding to Jan",
"format": "gguf",
"settings": {
@@ -24,7 +24,8 @@
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
- "max_tokens": 4096
+ "max_tokens": 4096,
+ "stop": [""]
},
"metadata": {
"author": "liuhaotian",
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index 9a406dcf42..3a694e5a02 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -1,7 +1,7 @@
{
"name": "@janhq/model-extension",
"productName": "Model Management",
- "version": "1.0.33",
+ "version": "1.0.34",
"description": "Model Management Extension provides model exploration and seamless downloads",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 6d26d576c7..7e7c12469a 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -411,7 +411,8 @@ export default class JanModelExtension extends ModelExtension {
.toLowerCase()
.includes(JanModelExtension._tensorRtEngineFormat)
)
- })?.length > 0 // TODO: find better way (can use basename to check the file name with source url)
+ // Check if the number of matched files equals the number of sources
+ })?.length >= model.sources.length
)
})
diff --git a/joi/src/core/Modal/styles.scss b/joi/src/core/Modal/styles.scss
index fcbf071057..11af9418ae 100644
--- a/joi/src/core/Modal/styles.scss
+++ b/joi/src/core/Modal/styles.scss
@@ -13,7 +13,7 @@ fieldset,
&__content {
color: hsla(var(--modal-fg));
- overflow: hidden;
+ overflow: auto;
background-color: hsla(var(--modal-bg));
border-radius: 8px;
font-size: 14px;
diff --git a/joi/src/core/Tabs/styles.scss b/joi/src/core/Tabs/styles.scss
index ce3df013b2..932b8431af 100644
--- a/joi/src/core/Tabs/styles.scss
+++ b/joi/src/core/Tabs/styles.scss
@@ -35,6 +35,7 @@
flex: 1;
height: 38px;
display: flex;
+ white-space: nowrap;
color: hsla(var(--text-secondary));
align-items: center;
justify-content: center;
diff --git a/web/containers/AutoLink/index.test.tsx b/web/containers/AutoLink/index.test.tsx
new file mode 100644
index 0000000000..9f4610a80c
--- /dev/null
+++ b/web/containers/AutoLink/index.test.tsx
@@ -0,0 +1,43 @@
+import React from 'react'
+import { render, screen } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import AutoLink from './index'
+
+describe('AutoLink Component', () => {
+ it('renders text without links correctly', () => {
+ const text = 'This is a test without links.'
+ render()
+ expect(screen.getByText(text)).toBeInTheDocument()
+ })
+
+ it('renders text with a single link correctly', () => {
+ const text = 'Check this link: https://example.com'
+ render()
+ const link = screen.getByText('https://example.com')
+ expect(link).toBeInTheDocument()
+ expect(link).toHaveAttribute('href', 'https://example.com')
+ expect(link).toHaveAttribute('target', 'blank')
+ })
+
+ it('renders text with multiple links correctly', () => {
+ const text = 'Visit https://example.com and http://test.com'
+ render()
+ const link1 = screen.getByText('https://example.com')
+ const link2 = screen.getByText('http://test.com')
+ expect(link1).toBeInTheDocument()
+ expect(link1).toHaveAttribute('href', 'https://example.com')
+ expect(link1).toHaveAttribute('target', 'blank')
+ expect(link2).toBeInTheDocument()
+ expect(link2).toHaveAttribute('href', 'http://test.com')
+ expect(link2).toHaveAttribute('target', 'blank')
+ })
+
+ it('renders text with a link without protocol correctly', () => {
+ const text = 'Visit example.com for more info.'
+ render()
+ const link = screen.getByText('example.com')
+ expect(link).toBeInTheDocument()
+ expect(link).toHaveAttribute('href', 'http://example.com')
+ expect(link).toHaveAttribute('target', 'blank')
+ })
+})
diff --git a/web/containers/BlankState/index.test.tsx b/web/containers/BlankState/index.test.tsx
new file mode 100644
index 0000000000..53cb2ece74
--- /dev/null
+++ b/web/containers/BlankState/index.test.tsx
@@ -0,0 +1,38 @@
+import React from 'react'
+import { render, screen } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import BlankState from './index'
+
+describe('BlankState Component', () => {
+ it('renders title correctly', () => {
+ const title = 'Test Title'
+ render()
+ expect(screen.getByText(title)).toBeInTheDocument()
+ })
+
+ it('renders description correctly when provided', () => {
+ const title = 'Test Title'
+ const description = 'Test Description'
+ render()
+ expect(screen.getByText(description)).toBeInTheDocument()
+ })
+
+ it('does not render description when not provided', () => {
+ const title = 'Test Title'
+ render()
+ expect(screen.queryByText('Test Description')).not.toBeInTheDocument()
+ })
+
+ it('renders action correctly when provided', () => {
+ const title = 'Test Title'
+ const action =
+ render()
+ expect(screen.getByText('Test Action')).toBeInTheDocument()
+ })
+
+ it('does not render action when not provided', () => {
+ const title = 'Test Title'
+ render()
+ expect(screen.queryByText('Test Action')).not.toBeInTheDocument()
+ })
+})
diff --git a/web/containers/Brand/Logo/Mark.test.tsx b/web/containers/Brand/Logo/Mark.test.tsx
new file mode 100644
index 0000000000..68df134c9a
--- /dev/null
+++ b/web/containers/Brand/Logo/Mark.test.tsx
@@ -0,0 +1,37 @@
+import React from 'react'
+import { render, screen } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import LogoMark from './Mark'
+
+describe('LogoMark Component', () => {
+ it('renders with default width and height', () => {
+ render()
+ const image = screen.getByAltText('Jan - Logo')
+ expect(image).toBeInTheDocument()
+ expect(image).toHaveAttribute('width', '24')
+ expect(image).toHaveAttribute('height', '24')
+ })
+
+ it('renders with provided width and height', () => {
+ render()
+ const image = screen.getByAltText('Jan - Logo')
+ expect(image).toBeInTheDocument()
+ expect(image).toHaveAttribute('width', '48')
+ expect(image).toHaveAttribute('height', '48')
+ })
+
+ it('applies provided className', () => {
+ render()
+ const image = screen.getByAltText('Jan - Logo')
+ expect(image).toBeInTheDocument()
+ expect(image).toHaveClass('custom-class')
+ })
+
+ it('renders with the correct src and alt attributes', () => {
+ render()
+ const image = screen.getByAltText('Jan - Logo')
+ expect(image).toBeInTheDocument()
+ expect(image).toHaveAttribute('src', 'icons/app_icon.svg')
+ expect(image).toHaveAttribute('alt', 'Jan - Logo')
+ })
+})
diff --git a/web/containers/CenterPanelContainer/index.test.tsx b/web/containers/CenterPanelContainer/index.test.tsx
new file mode 100644
index 0000000000..9e6fda0073
--- /dev/null
+++ b/web/containers/CenterPanelContainer/index.test.tsx
@@ -0,0 +1,56 @@
+import { render, screen } from '@testing-library/react'
+import { useAtomValue } from 'jotai'
+import CenterPanelContainer from './index'
+import '@testing-library/jest-dom'
+
+// Mock useAtomValue from jotai
+jest.mock('jotai', () => ({
+ ...jest.requireActual('jotai'),
+ useAtomValue: jest.fn(),
+}))
+
+describe('CenterPanelContainer', () => {
+ it('renders with reduceTransparent set to true', () => {
+ // Mock reduceTransparentAtom to be true
+ ;(useAtomValue as jest.Mock).mockReturnValue(true)
+
+ render(
+
+
Test Child
+
+ )
+
+ // Check that the container renders with no border or rounded corners
+ const container = screen.getByText('Test Child').parentElement
+ expect(container).not.toHaveClass('rounded-lg border')
+ })
+
+ it('renders with reduceTransparent set to false', () => {
+ // Mock reduceTransparentAtom to be false
+ ;(useAtomValue as jest.Mock).mockReturnValue(false)
+
+ render(
+
+
Test Child
+
+ )
+
+ // Check that the container renders with border and rounded corners
+ const container = screen.getByText('Test Child').parentElement
+ expect(container).toHaveClass('rounded-lg border')
+ })
+
+ it('renders children correctly', () => {
+ // Mock reduceTransparentAtom to be true for this test
+ ;(useAtomValue as jest.Mock).mockReturnValue(true)
+
+ render(
+
+
Child Content
+
+ )
+
+ // Verify that the child content is rendered
+ expect(screen.getByText('Child Content')).toBeInTheDocument()
+ })
+})
diff --git a/web/containers/CopyInstruction/index.test.tsx b/web/containers/CopyInstruction/index.test.tsx
new file mode 100644
index 0000000000..2f00e4e37c
--- /dev/null
+++ b/web/containers/CopyInstruction/index.test.tsx
@@ -0,0 +1,65 @@
+import { render, screen, fireEvent } from '@testing-library/react'
+import { useAtom } from 'jotai'
+import '@testing-library/jest-dom'
+import CopyOverInstruction from './index'
+
+// Mock the `useAtom` hook from jotai
+jest.mock('jotai', () => ({
+ useAtom: jest.fn(),
+}))
+
+describe('CopyOverInstruction', () => {
+ const setCopyOverInstructionEnabled = jest.fn()
+
+ beforeEach(() => {
+ ;(useAtom as jest.Mock).mockImplementation(() => [
+ false,
+ setCopyOverInstructionEnabled,
+ ])
+ })
+
+ afterEach(() => {
+ jest.clearAllMocks()
+ })
+
+ it('should render the component with the switch in the correct state', () => {
+ render()
+
+ // Assert the text is rendered
+ expect(
+ screen.getByText(/Save instructions for new threads/i)
+ ).toBeInTheDocument()
+
+ // Assert the switch is rendered and in the unchecked state
+ const switchInput = screen.getByRole('checkbox')
+ expect(switchInput).toBeInTheDocument()
+ expect(switchInput).not.toBeChecked()
+ })
+
+ it('should call setCopyOverInstructionEnabled when the switch is toggled', () => {
+ render()
+
+ const switchInput = screen.getByRole('checkbox')
+
+ // Simulate toggling the switch
+ fireEvent.click(switchInput)
+
+ // Assert that the atom setter is called with true when checked
+ expect(setCopyOverInstructionEnabled).toHaveBeenCalledWith(true)
+ })
+
+ it('should reflect the updated state when the atom value changes', () => {
+ // Mock the atom to return true (enabled state)
+ ;(useAtom as jest.Mock).mockImplementation(() => [
+ true,
+ setCopyOverInstructionEnabled,
+ ])
+
+ render()
+
+ const switchInput = screen.getByRole('checkbox')
+
+ // The switch should now be checked
+ expect(switchInput).toBeChecked()
+ })
+})
diff --git a/web/containers/EngineSetting/index.test.tsx b/web/containers/EngineSetting/index.test.tsx
new file mode 100644
index 0000000000..140a36395a
--- /dev/null
+++ b/web/containers/EngineSetting/index.test.tsx
@@ -0,0 +1,115 @@
+import { render } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import EngineSetting from './index'
+import SettingComponentBuilder from '@/containers/ModelSetting/SettingComponent'
+import { SettingComponentProps } from '@janhq/core'
+
+// Mock the SettingComponentBuilder component
+jest.mock('@/containers/ModelSetting/SettingComponent', () =>
+ jest.fn(() => null)
+)
+
+describe('EngineSetting', () => {
+ const mockComponentData: SettingComponentProps[] = [
+ {
+ key: 'setting1',
+ title: 'Setting 1',
+ description: 'This is the first setting.',
+ controllerType: 'input',
+ controllerProps: {
+ placeholder: 'Enter text',
+ value: 'default text',
+ type: 'text',
+ },
+ },
+ {
+ key: 'setting2',
+ title: 'Setting 2',
+ description: 'This is the second setting.',
+ controllerType: 'slider',
+ controllerProps: {
+ min: 0,
+ max: 100,
+ step: 1,
+ value: 50,
+ },
+ },
+ {
+ key: 'setting3',
+ title: 'Setting 3',
+ description: 'This is the third setting.',
+ controllerType: 'checkbox',
+ controllerProps: {
+ value: true,
+ },
+ },
+ ]
+
+ const onValueChangedMock = jest.fn()
+
+ afterEach(() => {
+ jest.clearAllMocks() // Clear mocks after each test
+ })
+
+ it('renders SettingComponentBuilder with the correct props', () => {
+ render(
+
+ )
+
+ // Check that SettingComponentBuilder is called with the correct props
+ expect(SettingComponentBuilder).toHaveBeenCalledWith(
+ {
+ componentProps: mockComponentData,
+ disabled: false,
+ onValueUpdated: onValueChangedMock,
+ },
+ {}
+ )
+ })
+
+ it('renders SettingComponentBuilder with disabled prop', () => {
+ render(
+
+ )
+
+ // Check that SettingComponentBuilder is called with disabled=true
+ expect(SettingComponentBuilder).toHaveBeenCalledWith(
+ {
+ componentProps: mockComponentData,
+ disabled: true,
+ onValueUpdated: onValueChangedMock,
+ },
+ {}
+ )
+ })
+
+ it('calls onValueChanged when the value is updated', () => {
+ // Simulating value update in SettingComponentBuilder
+ ;(SettingComponentBuilder as jest.Mock).mockImplementation(
+ ({ onValueUpdated }) => {
+ // Simulate calling the value update handler
+ onValueUpdated('setting1', 'new value')
+ return null
+ }
+ )
+
+ render(
+
+ )
+
+ // Assert that onValueChanged is called with the correct parameters
+ expect(onValueChangedMock).toHaveBeenCalledWith('setting1', 'new value')
+ })
+})
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
new file mode 100644
index 0000000000..dce55b5957
--- /dev/null
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
@@ -0,0 +1,124 @@
+import '@testing-library/jest-dom'
+import React from 'react'
+import { render, screen, waitFor } from '@testing-library/react'
+import SystemMonitor from './index'
+import { useAtom, useAtomValue } from 'jotai'
+import {
+ cpuUsageAtom,
+ gpusAtom,
+ totalRamAtom,
+ usedRamAtom,
+} from '@/helpers/atoms/SystemBar.atom'
+import useGetSystemResources from '@/hooks/useGetSystemResources'
+
+// Mock dependencies
+jest.mock('jotai', () => ({
+ useAtomValue: jest.fn(),
+ useSetAtom: jest.fn(),
+ useAtom: jest.fn(),
+ atom: jest.fn(),
+}))
+
+// Mock the hooks and atoms
+jest.mock('@/hooks/useGetSystemResources')
+
+jest.mock('@/hooks/usePath', () => ({
+ usePath: () => ({ onRevealInFinder: jest.fn() }),
+}))
+
+jest.mock('@/helpers/atoms/App.atom', () => ({
+ showSystemMonitorPanelAtom: { init: false },
+}))
+
+jest.mock('@/helpers/atoms/Setting.atom', () => ({
+ reduceTransparentAtom: { init: false },
+}))
+
+jest.mock('@/helpers/atoms/SystemBar.atom', () => ({
+ totalRamAtom: { init: 16000000000 },
+ usedRamAtom: { init: 8000000000 },
+ cpuUsageAtom: { init: 50 },
+ gpusAtom: { init: [] },
+ ramUtilitizedAtom: { init: 50 },
+}))
+
+describe('SystemMonitor', () => {
+ const mockWatch = jest.fn()
+ const mockStopWatching = jest.fn()
+ beforeAll(() => {
+ jest.clearAllMocks()
+ ;(useGetSystemResources as jest.Mock).mockReturnValue({
+ watch: mockWatch,
+ stopWatching: mockStopWatching,
+ })
+ })
+ it('renders without crashing', () => {
+ ;(useAtom as jest.Mock).mockReturnValue([false, jest.fn()])
+ render()
+ expect(screen.getByText('System Monitor')).toBeInTheDocument()
+ })
+
+ it('renders information on expand', () => {
+ const mockGpusAtom = jest.fn()
+ const mockShowPanel = jest.fn()
+ ;(useAtom as jest.Mock).mockImplementation(mockShowPanel)
+ // Mock Jotai hooks
+ ;(useAtomValue as jest.Mock).mockImplementation((atom) => {
+ switch (atom) {
+ case totalRamAtom:
+ return 16000000000
+ case usedRamAtom:
+ return 8000000000
+ case cpuUsageAtom:
+ return 30
+ case gpusAtom:
+ return mockGpusAtom
+ default:
+ return jest.fn()
+ }
+ })
+ mockGpusAtom.mockImplementation(() => [])
+ mockShowPanel.mockImplementation(() => [true, jest.fn()])
+
+ render()
+
+ expect(screen.getByText('Running Models')).toBeInTheDocument()
+ expect(screen.getByText('App Log')).toBeInTheDocument()
+ expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument()
+ expect(screen.getByText('30%')).toBeInTheDocument()
+ })
+
+ it('it should not request system resource on close', async () => {
+ const mockGpusAtom = jest.fn()
+ const mockShowPanel = jest.fn()
+ ;(useAtom as jest.Mock).mockImplementation(mockShowPanel)
+
+ // Mock Jotai hooks
+ ;(useAtomValue as jest.Mock).mockImplementation((atom) => {
+ switch (atom) {
+ case totalRamAtom:
+ return 16000000000
+ case usedRamAtom:
+ return 8000000000
+ case cpuUsageAtom:
+ return 30
+ case gpusAtom:
+ return mockGpusAtom
+ default:
+ return jest.fn()
+ }
+ })
+ mockGpusAtom.mockImplementation(() => [])
+ mockShowPanel.mockImplementation(() => [true, jest.fn()])
+
+ await waitFor(async () => {
+ await render()
+
+ const toggle = screen.getByTestId('system-monitoring')
+ toggle.click()
+ })
+
+ expect(mockWatch).not.toHaveBeenCalled()
+ expect(mockStopWatching).toHaveBeenCalled()
+ })
+})
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
index a69e34d57e..7fdc598ec0 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@@ -1,4 +1,4 @@
-import { Fragment, useEffect, useState } from 'react'
+import { Fragment, useCallback, useState } from 'react'
import { Progress } from '@janhq/joi'
import { useClickOutside } from '@janhq/joi'
@@ -51,35 +51,39 @@ const SystemMonitor = () => {
const reduceTransparent = useAtomValue(reduceTransparentAtom)
const { watch, stopWatching } = useGetSystemResources()
+
useClickOutside(
() => {
- setShowSystemMonitorPanel(false)
+ toggleShowSystemMonitorPanel(false)
setShowFullScreen(false)
},
null,
[control, elementExpand]
)
- useEffect(() => {
- // Watch for resource update
- watch()
-
- return () => {
- stopWatching()
- }
- // eslint-disable-next-line react-hooks/exhaustive-deps
- }, [])
+ const toggleShowSystemMonitorPanel = useCallback(
+ (isShow: boolean) => {
+ setShowSystemMonitorPanel(isShow)
+ if (isShow) {
+ watch()
+ } else {
+ stopWatching()
+ }
+ },
+ [setShowSystemMonitorPanel, stopWatching, watch]
+ )
return (
+
+ )
+
+ // Check if the child content is rendered
+ expect(getByText('Child Content')).toBeInTheDocument()
+ })
+
+ it('hides left and right panels on small screens', () => {
+ // Simulate mobile view
+ window.matchMedia = jest.fn().mockImplementation((query) => ({
+ matches: true, // Change to true to simulate mobile
+ addListener: jest.fn(),
+ removeListener: jest.fn(),
+ }))
+
+ render(
+
+
Child Content
+
+ )
+
+ // Check that the left and right panel states were updated to false
+ expect(mockSetShowLeftPanel).toHaveBeenCalledWith(false)
+ expect(mockSetShowRightPanel).toHaveBeenCalledWith(false)
+ })
+
+ it('restores the last known panel states on larger screens', () => {
+ // Simulate mobile view first
+ window.matchMedia = jest.fn().mockImplementation((query) => ({
+ matches: true, // Change to true to simulate mobile
+ addListener: jest.fn(),
+ removeListener: jest.fn(),
+ }))
+
+ render(
+
+
Child Content
+
+ )
+
+ // Change back to desktop view
+ window.matchMedia = jest.fn().mockImplementation((query) => ({
+ matches: false, // Change to false to simulate desktop
+ addListener: jest.fn(),
+ removeListener: jest.fn(),
+ }))
+
+ // Call the effect manually to simulate the component re-rendering
+ const rerender = render(
+
+
Child Content
+
+ )
+
+ // Check that the last known states were restored (which were true initially)
+ expect(mockSetShowLeftPanel).toHaveBeenCalledWith(true)
+ expect(mockSetShowRightPanel).toHaveBeenCalledWith(true)
+ })
+})
diff --git a/web/containers/Providers/Theme.test.tsx b/web/containers/Providers/Theme.test.tsx
new file mode 100644
index 0000000000..552bbecbe5
--- /dev/null
+++ b/web/containers/Providers/Theme.test.tsx
@@ -0,0 +1,24 @@
+import '@testing-library/jest-dom'
+import React from 'react'
+import { render } from '@testing-library/react'
+import ThemeWrapper from './Theme'
+
+// Mock the ThemeProvider from next-themes
+jest.mock('next-themes', () => ({
+ ThemeProvider: ({ children }: { children: React.ReactNode }) => (
+