OpenAI Chat Completions

The POST /v1/chat/completions endpoint supports both streaming (SSE) and non-streaming JSON responses, including text content and tool calls. This is the most commonly used endpoint.

Endpoint

Method	Path	Format
POST	/v1/chat/completions	SSE (stream: true) or JSON (stream: false)

Unit Test: Text Response

Using the programmatic API with vitest, register a fixture and assert on the response.

text-response.test.ts ts

import { LLMock } from "@copilotkit/llmock";
import { describe, it, expect, beforeAll, afterAll } from "vitest";

let mock: LLMock;

beforeAll(async () => {
  mock = new LLMock();
  await mock.start();
});

afterAll(async () => {
  await mock.stop();
});

it("non-streaming text response", async () => {
  mock.on({ userMessage: "hello" }, { content: "Hello! How can I help?" });

  const res = await fetch(`${mock.url}/v1/chat/completions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model: "gpt-4",
      messages: [{ role: "user", content: "hello" }],
      stream: false,
    }),
  });

  const body = await res.json();
  expect(body.choices[0].message.content).toBe("Hello! How can I help?");
  expect(body.object).toBe("chat.completion");
  expect(body.id).toMatch(/^chatcmpl-/);
});

Unit Test: Tool Calls

tool-calls.test.ts ts

it("returns tool call in streaming mode", async () => {
  mock.on(
    { userMessage: "weather" },
    { toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }] }
  );

  const res = await fetch(`${mock.url}/v1/chat/completions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model: "gpt-4",
      messages: [{ role: "user", content: "what is the weather?" }],
      stream: true,
    }),
  });

  const text = await res.text();
  expect(text).toContain("get_weather");
  expect(text).toContain("data: [DONE]");
});

Integration Test: Streaming SSE

streaming-integration.test.ts ts

import { createServer, type ServerInstance } from "@copilotkit/llmock/server";

const instance = await createServer(
  [{ match: { userMessage: "hello" }, response: { content: "Hello! How can I help?" } }],
  { port: 0, chunkSize: 10 }
);

const res = await httpPost(`${instance.url}/v1/chat/completions`, {
  model: "gpt-4",
  messages: [{ role: "user", content: "hello" }],
  stream: true,
});

// Parse SSE chunks
const chunks = res.body
  .split("\n\n")
  .filter(b => b.startsWith("data: ") && !b.includes("[DONE]"))
  .map(b => JSON.parse(b.slice(6)));

// First chunk has the role
expect(chunks[0].choices[0].delta.role).toBe("assistant");

// Reassemble content
const content = chunks.map(c => c.choices[0].delta.content ?? "").join("");
expect(content).toBe("Hello! How can I help?");

// Last chunk has finish_reason
expect(chunks.at(-1).choices[0].finish_reason).toBe("stop");

JSON Fixture

fixtures/chat.json json

{
  "fixtures": [
    {
      "match": { "userMessage": "hello" },
      "response": { "content": "Hello! How can I help?" }
    },
    {
      "match": { "userMessage": "weather" },
      "response": {
        "toolCalls": [{
          "name": "get_weather",
          "arguments": "{\"city\":\"SF\"}"
        }]
      }
    }
  ]
}

Response Format

Non-streaming (stream: false)

Returns a single JSON object matching the OpenAI ChatCompletion type:

id — starts with chatcmpl-
object — "chat.completion"
created — Unix timestamp
model — echoes the requested model
choices[0].message.content — the response text
choices[0].message.refusal — always null
choices[0].finish_reason — "stop" or "tool_calls"
usage — token counts (zeroed in mock)

Streaming (stream: true)

Returns text/event-stream with data: {json}\n\n lines, ending with data: [DONE]\n\n. Each chunk matches the OpenAI ChatCompletionChunk type with delta instead of message.