From 2b2c679905600b5fda9acc06026341a7ba2f41e6 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Mon, 16 Dec 2024 10:01:08 -0800 Subject: [PATCH] Tokenize Detokenize compatibility --- examples/tokenization/tokenization.ts | 21 +++++++++++++++++ src/browser.ts | 34 ++++++++++++++++++++++++++- src/interfaces.ts | 18 ++++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 examples/tokenization/tokenization.ts diff --git a/examples/tokenization/tokenization.ts b/examples/tokenization/tokenization.ts new file mode 100644 index 0000000..1878444 --- /dev/null +++ b/examples/tokenization/tokenization.ts @@ -0,0 +1,21 @@ +import ollama from '../../src/browser.js' + +async function main() { + // Tokenize some text + const tokResponse = await ollama.tokenize({ + model: 'llama3.2', + text: 'Hello, how are you?' + }) + + console.log('Tokens from model:', tokResponse.tokens) + + // Detokenize the tokens back to text + const detokResponse = await ollama.detokenize({ + model: 'llama3.2', + tokens: tokResponse.tokens + }) + + console.log('Text from model:', detokResponse.text) +} + +main().catch(console.error) diff --git a/src/browser.ts b/src/browser.ts index 6d8dcc6..6860631 100644 --- a/src/browser.ts +++ b/src/browser.ts @@ -9,6 +9,8 @@ import type { CopyRequest, CreateRequest, DeleteRequest, + DetokenizeRequest, + DetokenizeResponse, EmbedRequest, EmbedResponse, EmbeddingsRequest, @@ -24,6 +26,8 @@ import type { ShowRequest, ShowResponse, StatusResponse, + TokenizeRequest, + TokenizeResponse, } from './interfaces.js' export class Ollama { @@ -322,9 +326,37 @@ async encodeImage(image: Uint8Array | string): Promise { }) return (await response.json()) as ListResponse } -} + /** + * Tokenizes text into tokens. + * @param request {TokenizeRequest} - The request object. + * @returns {Promise} - The response object. + */ + async tokenize(request: TokenizeRequest): Promise { + const response = await utils.post(this.fetch, `${this.config.host}/api/tokenize`, { + ...request, + }, { + headers: this.config.headers + }) + return (await response.json()) as TokenizeResponse + } + + /** + * Detokenizes tokens back into text. + * @param request {DetokenizeRequest} - The request object. + * @returns {Promise} - The response object. + */ + async detokenize(request: DetokenizeRequest): Promise { + const response = await utils.post(this.fetch, `${this.config.host}/api/detokenize`, { + ...request, + }, { + headers: this.config.headers + }) + return (await response.json()) as DetokenizeResponse + } +} export default new Ollama() // export all types from the main entry point so that packages importing types dont need to specify paths export * from './interfaces.js' + diff --git a/src/interfaces.ts b/src/interfaces.ts index a941c9f..d99140e 100644 --- a/src/interfaces.ts +++ b/src/interfaces.ts @@ -159,6 +159,16 @@ export interface EmbeddingsRequest { options?: Partial } +export interface TokenizeRequest { + model: string + text: string +} + +export interface DetokenizeRequest { + model: string + tokens: Int32Array +} + // response types export interface GenerateResponse { @@ -242,6 +252,14 @@ export interface ListResponse { models: ModelResponse[] } +export interface TokenizeResponse { + tokens: Int32Array +} + +export interface DetokenizeResponse { + text: string +} + export interface ErrorResponse { error: string }