Merge branch 'main' into feat/use-llama-cpp-server

janhq · Jan 7, 2025 · 7bbc7fe · 7bbc7fe
2 parents 1903170 + 44412ee
commit 7bbc7fe
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -148,3 +148,4 @@ Table of parameters
 |`flash_attn` | Boolean| To enable Flash Attention, default is true|
 |`cache_type` | String| KV cache type: f16, q8_0, q4_0, default is f16|
 |`use_mmap` | Boolean| To enable mmap, default is true|
+|`ctx_shift` | Boolean| To enable context shift, default is true|
diff --git a/llama.cpp b/llama.cpp
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
@@ -712,6 +712,7 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
       }
     }
 
+    params.ctx_shift = json_body->get("ctx_shift", true).asBool();
     params.n_gpu_layers =
         json_body->get("ngl", 300)
             .asInt();  // change from 100 -> 300 since llama 3.1 has 292 gpu layers
+1 −0		README.md
+6 −0		examples/server/README.md
+76 −40		examples/server/server.cpp
+6 −0		examples/server/tests/README.md
+1 −0		examples/server/tests/requirements.txt
+83 −10		examples/server/tests/unit/test_lora.py
+1 −9		examples/server/tests/unit/test_speculative.py
+21 −0		examples/server/tests/utils.py
+41 −0		examples/server/utils.hpp