Calculate OpenAI usage for Chat Completion API stream in NodeJS
Introduction
OpenAI does have the usage field in its non-stream response.
For example with a non-stream request like this (stream: false
):
{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "say hello world"
}
],
"temperature": 0.7,
"stream": false
}
We will get the response mentioned the token usage:
{
"id": "chatcmpl-84hml7McTvfEihzl7S0Osb1BbfshR",
"object": "chat.completion",
"created": 1696132315,
"model": "gpt-3.5-turbo-0613",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello, world!"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 4,
"total_tokens": 14
}
}
However, when we request for stream response (stream: true
), there will be a few chunks in response with no usage
field
{
"id": "chatcmpl-84hoe6uo51KhIEjgkNHkhk58xEUcB",
"object": "chat.completion.chunk",
"created": 1696132432,
"model": "gpt-3.5-turbo-0613",
"choices": [
{
"index": 0,
"delta": {
"content": " world"
},
"finish_reason": null
}
]
}
In this case, to calculate the token usage, we will need to calculate it in the request prompt and the OpenAI response. We will do it using the tiktoken
library
Setup the necessary components
We will set up a basic NodeJS application that will make a stream request to OpenAI and use tiktoken
to calculate the token usage
Here is the basic setup:
mkdir openai-usage
cd openai-usage
touch index.js
npm i https
Write this code to the index.js
file:
const https = require("https");
const options = {
hostname: "api.openai.com",
path: "/v1/chat/completions",
port: 443,
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
},
};
const payload = {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: "say hello world",
},
],
temperature: 0.7,
stream: true,
};
const request = https.request(options, (res) => {
res.on("data", (chunk) => {
console.log(chunk.toString());
});
res.on("end", () => {
console.log("done");
});
});
request.write(JSON.stringify(payload));
request.end();
In this setup, we use the https
package to make request to OpenAI API. We put some domain, header information in the options
, and put payload information in the payload
. Be noted that we set the stream: true
in the payload. And we print the chunk of data getting from OpenAI stream response.
Run the command in the project root folder (replace the sk-your-open-ai-key
with your OpenAI key):
OPENAI_API_KEY=sk-your-open-ai-key node index.js
We will get the response stream printed in the console as below:
# console.log(chunk.toString());
data: {"id":"chatcmpl-84iSdjVbZR88SOfbMRCnuVF4wEuOF","object":"chat.completion.chunk","created":1696134911,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-84iSdjVbZR88SOfbMRCnuVF4wEuOF","object":"chat.completion.chunk","created":1696134911,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}
data: {"id":"chatcmpl-84iSdjVbZR88SOfbMRCnuVF4wEuOF","object":"chat.completion.chunk","created":1696134911,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{"content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-84iSdjVbZR88SOfbMRCnuVF4wEuOF","object":"chat.completion.chunk","created":1696134911,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{"content":" world"},"finish_reason":null}]}
data: {"id":"chatcmpl-84iSdjVbZR88SOfbMRCnuVF4wEuOF","object":"chat.completion.chunk","created":1696134911,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}
data: {"id":"chatcmpl-84iSdjVbZR88SOfbMRCnuVF4wEuOF","object":"chat.completion.chunk","created":1696134911,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
data: [DONE]
# console.log("done");
done
Now we install js-tikoken
library and set up the encoder which will help to calculate the token usage from the text string.
In the project root folder run
npm i js-tiktoken
In the index.js
file add this to the top:
const tiktoken = require("js-tiktoken");
const encoding = tiktoken.getEncoding("cl100k_base");
const tokenUsage = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
}
We use the cl100k_base
model to support GPT-3.5 turbo
and GPT-4
We also create an empty tokenUsage
object follow the format of OpenAI usage field. The prompt_tokens
will calculate the usage in our prompt payload, the completion_tokens
will calculate the OpenAI response
Calculate the token usage in the request prompt
Create a helper function to calculate token usage from prompt message
// https://platform.openai.com/docs/guides/gpt/managing-tokens
const numTokensFromPrompt = (messages) => {
let numTokens = 0;
for (const message of messages) {
numTokens += 4; // every message follows <im_start>{role/name}\n{content}<im_end>\n
for (const [key, value] of Object.entries(message)) {
numTokens += encoding.encode(value).length;
if (key === "name") numTokens -= 1; // role is always required and always 1 token
}
}
numTokens += 2; // every reply is primed with <im_start>assistant
return numTokens;
};
Add this code below the payload
tokenUsage.prompt_tokens = numTokensFromPrompt(payload.messages);
Calculate the token usage in the OpenAI response
Modify the request to calculate the token usage in the response data stream
const request = https.request(options, (res) => {
let content = "";
res.on("data", (chunk) => {
const jsonString = chunk.toString();
const prefixToRemove = "data: ";
const trimmedString = jsonString.slice(prefixToRemove.length);
try {
const jsonObject = JSON.parse(trimmedString);
const deltaContent = jsonObject.choices[0]?.delta.content || "";
content += deltaContent;
} catch (error) {
console.error("Error counting tokens from OpenAI response:", error);
}
});
res.on("end", () => {
const tokens = encoding.encode(content);
tokenUsage.completion_tokens = tokens.length;
if (tokens.length > 0) tokenUsage.completion_tokens += 1; // +1 missing token from first chunk: https://community.openai.com/t/stream-response-from-v1-chat-completions-endpoint-is-missing-the-first-token/187835/7?u=zenbuidler
tokenUsage.total_tokens = tokenUsage.prompt_tokens + tokenUsage.completion_tokens;
console.log("tokenUsage", tokenUsage);
});
});
In this code, we listen to the data
event from the response stream, remove the data:
prefix in each chunk, parse to JSON, get the message and append it to the content
variable for later using.
In the end
event of the response stream, we get full message in the content
variable. We then use tiktoken
to calculate the usage.
Note: We use 1 hack here: We + 1 token because the first 2 chunks of data are sent together to we miss 1 token when parsing it. Later we can calculate data chunk by byte for more accuracy.
Full code in the index.js
file:
const https = require("https");
const tiktoken = require("js-tiktoken");
const encoding = tiktoken.getEncoding("cl100k_base");
const tokenUsage = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
};
// https://platform.openai.com/docs/guides/gpt/managing-tokens
const numTokensFromPrompt = (messages) => {
let numTokens = 0;
for (const message of messages) {
numTokens += 4; // every message follows <im_start>{role/name}\n{content}<im_end>\n
for (const [key, value] of Object.entries(message)) {
numTokens += encoding.encode(value).length;
if (key === "name") numTokens -= 1; // role is always required and always 1 token
}
}
numTokens += 2; // every reply is primed with <im_start>assistant
return numTokens;
};
const options = {
hostname: "api.openai.com",
path: "/v1/chat/completions",
port: 443,
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
},
};
const payload = {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: "say hello world",
},
],
temperature: 0.7,
stream: true,
};
tokenUsage.prompt_tokens = numTokensFromPrompt(payload.messages);
const request = https.request(options, (res) => {
let content = "";
res.on("data", (chunk) => {
const jsonString = chunk.toString();
const prefixToRemove = "data: ";
const trimmedString = jsonString.slice(prefixToRemove.length);
try {
const jsonObject = JSON.parse(trimmedString);
const deltaContent = jsonObject.choices[0]?.delta.content || "";
content += deltaContent;
} catch (error) {
console.error("Error counting tokens from OpenAI response:", error);
}
});
res.on("end", () => {
const tokens = encoding.encode(content);
tokenUsage.completion_tokens = tokens.length;
if (tokens.length > 0) tokenUsage.completion_tokens += 1; // +1 missing token from first chunk: https://community.openai.com/t/stream-response-from-v1-chat-completions-endpoint-is-missing-the-first-token/187835/7?u=zenbuidler
tokenUsage.total_tokens = tokenUsage.prompt_tokens + tokenUsage.completion_tokens;
console.log("tokenUsage", tokenUsage);
});
});
request.write(JSON.stringify(payload));
request.end();
Now run the program again
OPENAI_API_KEY=sk-your-open-ai-key node index.js
We will see the result
tokenUsage { prompt_tokens: 10, completion_tokens: 4, total_tokens: 1
Recap
We already finish the code to calculate the OpenAI stream response usage using js-tiktoken
library.
This code is useful when we want to actively calculate OpenAI token usage by ourselves.
Source code can be found here: https://github.com/votanlean/openai-usage
Thank you for reading ^^
See you in the next post!
Subscribe to my newsletter
Read articles from An Vo directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by
An Vo
An Vo
Learn | Build | Share