diff --git a/crates/edgen_core/src/llm.rs b/crates/edgen_core/src/llm.rs
index 3be07d4..61ee192 100644
--- a/crates/edgen_core/src/llm.rs
+++ b/crates/edgen_core/src/llm.rs
@@ -49,6 +49,7 @@ pub struct CompletionArgs {
     pub one_shot: bool,
     pub seed: Option<u32>,
     pub frequency_penalty: f32,
+    pub context_hint: Option<u32>,
 }
 
 impl Default for CompletionArgs {
@@ -58,6 +59,7 @@ impl Default for CompletionArgs {
             one_shot: false,
             seed: None,
             frequency_penalty: 0.0,
+            context_hint: None,
         }
     }
 }
diff --git a/crates/edgen_rt_llama_cpp/src/lib.rs b/crates/edgen_rt_llama_cpp/src/lib.rs
index 64effbd..a4ff332 100644
--- a/crates/edgen_rt_llama_cpp/src/lib.rs
+++ b/crates/edgen_rt_llama_cpp/src/lib.rs
@@ -247,7 +247,7 @@ impl UnloadingModel {
             //params.seed = args.seed;
             params.n_threads = threads;
             params.n_threads_batch = threads;
-            params.n_ctx = CONTEXT_SIZE;
+            params.n_ctx = args.context_hint.unwrap_or(CONTEXT_SIZE);
 
             let mut session = model_guard
                 .create_session(params)
@@ -306,7 +306,7 @@ impl UnloadingModel {
             //params.seed = args.seed;
             params.n_threads = threads;
             params.n_threads_batch = threads;
-            params.n_ctx = CONTEXT_SIZE;
+            params.n_ctx = args.context_hint.unwrap_or(CONTEXT_SIZE);
 
             let session = model_guard
                 .create_session(params)
diff --git a/crates/edgen_server/src/openai_shim.rs b/crates/edgen_server/src/openai_shim.rs
index c8a6ec7..ae580ce 100644
--- a/crates/edgen_server/src/openai_shim.rs
+++ b/crates/edgen_server/src/openai_shim.rs
@@ -367,6 +367,13 @@ pub struct CreateChatCompletionRequest<'a> {
     /// Indicate if this is an isolated request, with no associated past or future context. This may allow for
     /// optimisations in some implementations. Default: `false`
     pub one_shot: Option<bool>,
+
+    /// A hint for how big a context will be.
+    ///
+    /// # Warning
+    /// An unsound hint may severely drop performance and/or inference quality, and in some cases even cause Edgen
+    /// to crash. Do not set this value unless you know what you are doing.
+    pub context_hint: Option<u32>,
 }
 
 /// A message in a chat completion.
@@ -633,6 +640,7 @@ pub async fn chat_completions(
     let mut args = CompletionArgs {
         prompt: untokenized_context,
         seed: req.seed,
+        context_hint: req.context_hint,
         ..Default::default()
     };