Back to roadmaps openai Course

Implementing Streaming Responses in Next.js

Waiting for a long AI completion to load entirely can feel sluggish. By enabling Streaming, the model returns tokens as soon as they are ready, providing a responsive typewriter-style effect.


1. How Chat Streaming Works

graph TD
    A[Client sends chat request] --> B[Server calls OpenAI API with stream enabled]
    B --> C[OpenAI pushes token chunks via HTTP SSE]
    C --> D[Server streams chunks down to client browser]
    D --> E[Frontend renders characters incrementally]

2. Implementing Streaming in Route Handlers

Create a Next.js Route Handler that returns a streaming readable response:

// app/api/chat/route.ts
import { NextResponse } from "next/server";
import { openai } from "../../../lib/openai";

export async function POST(req: Request) {
  const { messages } = await req.json();

  try {
    // Call Chat Completion API with stream parameter enabled
    const responseStream = await openai.chat.completions.create({
      model: "gpt-4o-mini",
      messages,
      stream: true,
    });

    // Create a client ReadableStream
    const stream = new ReadableStream({
      async start(controller) {
        const encoder = new TextEncoder();
        
        for await (const chunk of responseStream) {
          const content = chunk.choices[0]?.delta?.content || "";
          
          if (content) {
            // Encode text content into Uint8Array
            controller.enqueue(encoder.encode(content));
          }
        }
        controller.close();
      },
    });

    return new Response(stream, {
      headers: {
        "Content-Type": "text/event-stream",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
      },
    });
  } catch (err: any) {
    return NextResponse.json({ error: err.message }, { status: 500 });
  }
}

3. Consuming Streaming Responses in Frontend

Use the Fetch API to read the response stream inside a React Client Component:

// app/chat/ChatWindow.tsx
"use client";

import React, { useState } from "react";

export default function ChatWindow() {
  const [input, setInput] = useState("");
  const [output, setOutput] = useState("");

  async function handleSend() {
    setOutput("");
    const response = await fetch("/api/chat", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        messages: [{ role: "user", content: input }],
      }),
    });

    const reader = response.body?.getReader();
    const decoder = new TextDecoder();

    if (!reader) return;

    // Read chunks in a loop until done is true
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      const chunkText = decoder.decode(value);
      setOutput((prev) => prev + chunkText); // Concatenate tokens incrementally
    }
  }

  return (
    <div className="p-8 max-w-xl mx-auto">
      <textarea
        value={input}
        onChange={(e) => setInput(e.target.value)}
        className="w-full border p-3 rounded-lg"
        placeholder="Ask a question..."
      />
      <button onClick={handleSend} className="bg-blue-600 text-white px-4 py-2 rounded-lg mt-2">
        Submit
      </button>
      <div className="mt-6 p-4 bg-gray-50 rounded-lg whitespace-pre-wrap min-h-24">
        {output || "AI response will stream here..."}
      </div>
    </div>
  );
}
Published on Last updated: