-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="utf-8"> | ||
<title>Source: Providers/OpenAI/Tokenizer.php - 10up ClassifAI Hook Docs</title> | ||
|
||
<script src="scripts/prettify/prettify.js"> </script> | ||
<script src="scripts/prettify/lang-css.js"> </script> | ||
<!--[if lt IE 9]> | ||
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> | ||
<![endif]--> | ||
<link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> | ||
<link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> | ||
|
||
<link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:300,400|Playfair+Display:900&display=swap" rel="stylesheet"> | ||
<link type="text/css" rel="stylesheet" href="styles-10up.css"> | ||
</head> | ||
|
||
<body> | ||
|
||
<div id="main"> | ||
|
||
|
||
<h1 class="page-title">Source: Providers/OpenAI/Tokenizer.php</h1> | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<section> | ||
<article> | ||
<pre class="prettyprint source linenums"><code><?php | ||
/** | ||
* OpenAI Tokenizer | ||
*/ | ||
|
||
namespace Classifai\Providers\OpenAI; | ||
|
||
class Tokenizer { | ||
|
||
/** | ||
* Maximum number of tokens our model supports | ||
* | ||
* @var int | ||
*/ | ||
public $max_tokens; | ||
|
||
/** | ||
* How many characters in one token (roughly) | ||
* | ||
* @var int | ||
*/ | ||
public $characters_in_token = 4; | ||
|
||
/** | ||
* How many tokens a word will take (roughly) | ||
* | ||
* @var float | ||
*/ | ||
public $tokens_per_word = 1.5; | ||
|
||
/** | ||
* OpenAI Tokenizer constructor. | ||
* | ||
* @param int $max_tokens Maximum tokens the model supports. | ||
*/ | ||
public function __construct( $max_tokens ) { | ||
$this->max_tokens = $max_tokens; | ||
|
||
/** | ||
* How many characters in one token (roughly) | ||
* | ||
* @since 2.4..0 | ||
* @hook classifai_openai_characters_in_token | ||
* | ||
* @param {int} $characters_in_token How many characters in one token (roughly) | ||
* @param {int} $max_tokens Maximum tokens the model supports. | ||
* | ||
* @return {int} | ||
*/ | ||
$this->characters_in_token = apply_filters( 'classifai_openai_characters_in_token', $this->characters_in_token, $max_tokens ); | ||
|
||
/** | ||
* How many tokens a word will take (roughly) | ||
* | ||
* @since 2.4.0 | ||
* @hook classifai_openai_tokens_per_word | ||
* | ||
* @param {int} $tokens_per_word How many tokens a word will take (roughly) | ||
* @param {int} $max_tokens Maximum tokens the model supports. | ||
* | ||
* @return {int} | ||
*/ | ||
$this->tokens_per_word = apply_filters( 'classifai_openai_tokens_per_word', $this->tokens_per_word, $max_tokens ); | ||
} | ||
|
||
/** | ||
* Determine roughly how many tokens a string contains. | ||
* | ||
* @param string $content Content to analyze. | ||
* @return int | ||
*/ | ||
public function tokens_in_content( string $content = '' ) { | ||
$tokens = ceil( mb_strlen( $content ) / $this->characters_in_token ); | ||
|
||
return (int) $tokens; | ||
} | ||
|
||
/** | ||
* Determine how many tokens are in a certain number of words. | ||
* | ||
* @param int $words Number of words we want. | ||
* @return int | ||
*/ | ||
public function tokens_in_words( int $words = 1 ) { | ||
$tokens = ceil( $this->tokens_per_word * absint( $words ) ); | ||
|
||
return (int) $tokens; | ||
} | ||
|
||
/** | ||
* Trim our content, if needed, to be under our max token number. | ||
* | ||
* @param string $content Content to trim. | ||
* @param int $max_tokens Maximum tokens our content can have. | ||
* @return string | ||
*/ | ||
public function trim_content( string $content = '', int $max_tokens = 0 ) { | ||
// Remove linebreaks that may have been added. | ||
$content = str_replace( "\n\n", ' ', $content ); | ||
|
||
// Determine how many tokens the content has. | ||
$content_tokens = $this->tokens_in_content( $content ); | ||
|
||
// If we don't need to trim, return full content. | ||
if ( $content_tokens < $max_tokens ) { | ||
return $content; | ||
} | ||
|
||
/** | ||
* Next we determine how many tokens we need to trim by taking the | ||
* number of tokens in the content and subtracting the max tokens | ||
* we can have. | ||
* | ||
* Then we convert that token number to characters. | ||
* | ||
* Finally we determine what the max character length our content | ||
* can be and trim it up. | ||
*/ | ||
$tokens_to_trim = $content_tokens - $max_tokens; | ||
$characters_to_trim = $tokens_to_trim * $this->characters_in_token; | ||
$max_content_length = mb_strlen( $content ) - $characters_to_trim; | ||
$trimmed_content = mb_substr( $content, 0, $max_content_length ); | ||
|
||
// Ensure we our final string ends on a full word instead of truncating in the middle. | ||
if ( ! preg_match( '/\\W/u', mb_substr( $content, $max_content_length - 1, 2 ) ) ) { | ||
if ( preg_match( '/.*\\W/u', $trimmed_content, $matches ) ) { | ||
$trimmed_content = $matches[0]; | ||
} | ||
} | ||
|
||
return trim( $trimmed_content ); | ||
} | ||
|
||
} | ||
</code></pre> | ||
</article> | ||
</section> | ||
|
||
|
||
|
||
|
||
|
||
<footer> | ||
<a href="https://classifaiplugin.com/">ClassifAI Plugin</a> • | ||
<a href="https://github.com/10up/classifai/">ClassifAI on GitHub</a> • | ||
<a href="https://10up.com/careers">Careers at 10up</a> | ||
</footer> | ||
|
||
|
||
</div> | ||
|
||
<nav> | ||
<h2><a href="index.html">Home</a></h2><h3>Actions</h3><ul><li><a href="after_classifai_init.html">after_classifai_init</a></li><li><a href="before_classifai_init.html">before_classifai_init</a></li><li><a href="classifai_azure_read_after_request.html">classifai_azure_read_after_request</a></li><li><a href="classifai_computer_vision_caption_failed.html">classifai_computer_vision_caption_failed</a></li><li><a href="classifai_computer_vision_image_tag_failed.html">classifai_computer_vision_image_tag_failed</a></li><li><a href="classifai_ocr_after_request.html">classifai_ocr_after_request</a></li><li><a href="classifai_ocr_unsuccessful_response.html">classifai_ocr_unsuccessful_response</a></li><li><a href="classifai_smart_cropping_after_request.html">classifai_smart_cropping_after_request</a></li><li><a href="classifai_smart_cropping_unsuccessful_response.html">classifai_smart_cropping_unsuccessful_response</a></li></ul><h3>Filters</h3><ul><li><a href="classifai_all_post_statuses.html">classifai_all_post_statuses</a></li><li><a href="classifai_audio_generation_initial_state.html">classifai_audio_generation_initial_state</a></li><li><a href="classifai_audio_generation_subsequent_state.html">classifai_audio_generation_subsequent_state</a></li><li><a href="classifai_azure_read_request_args.html">classifai_azure_read_request_args</a></li><li><a href="classifai_azure_read_result_max_page.html">classifai_azure_read_result_max_page</a></li><li><a href="classifai_azure_read_retry_interval.html">classifai_azure_read_retry_interval</a></li><li><a href="classifai_azure_read_should_process.html">classifai_azure_read_should_process</a></li><li><a href="classifai_azure_read_text_result.html">classifai_azure_read_text_result</a></li><li><a href="classifai_chatgpt_allowed_roles.html">classifai_chatgpt_allowed_roles</a></li><li><a href="classifai_chatgpt_content.html">classifai_chatgpt_content</a></li><li><a href="classifai_chatgpt_excerpt_prompt.html">classifai_chatgpt_excerpt_prompt</a></li><li><a href="classifai_chatgpt_excerpt_request_body.html">classifai_chatgpt_excerpt_request_body</a></li><li><a href="classifai_chatgpt_resize_content_request_body.html">classifai_chatgpt_resize_content_request_body</a></li><li><a href="classifai_chatgpt_title_prompt.html">classifai_chatgpt_title_prompt</a></li><li><a href="classifai_chatgpt_title_request_body.html">classifai_chatgpt_title_request_body</a></li><li><a href="classifai_classified_data.html">classifai_classified_data</a></li><li><a href="classifai_computer_vision_captions.html">classifai_computer_vision_captions</a></li><li><a href="classifai_computer_vision_image_tags.html">classifai_computer_vision_image_tags</a></li><li><a href="classifai_computer_vision_max_filesize.html">classifai_computer_vision_max_filesize</a></li><li><a href="classifai_dalle_caption.html">classifai_dalle_caption</a></li><li><a href="classifai_dalle_prompt.html">classifai_dalle_prompt</a></li><li><a href="classifai_dalle_request_body.html">classifai_dalle_request_body</a></li><li><a href="classifai_debug_information.html">classifai_debug_information</a></li><li><a href="classifai_disable_post_to_audio_block.html">classifai_disable_post_to_audio_block</a></li><li><a href="classifai_feature_threshold.html">classifai_feature_threshold</a></li><li><a href="classifai_generate_image_alt_tags_source_url.html">classifai_generate_image_alt_tags_source_url</a></li><li><a href="classifai_language_settings_post_statuses.html">classifai_language_settings_post_statuses</a></li><li><a href="classifai_language_settings_post_types.html">classifai_language_settings_post_types</a></li><li><a href="classifai_listen_to_this_post_text.html">classifai_listen_to_this_post_text</a></li><li><a href="classifai_normalize.html">classifai_normalize</a></li><li><a href="classifai_ocr_approved_media_types.html">classifai_ocr_approved_media_types</a></li><li><a href="classifai_ocr_should_process.html">classifai_ocr_should_process</a></li><li><a href="classifai_ocr_tag_confidence.html">classifai_ocr_tag_confidence</a></li><li><a href="classifai_ocr_tags.html">classifai_ocr_tags</a></li><li><a href="classifai_ocr_text.html">classifai_ocr_text</a></li><li><a href="classifai_ocr_text_post_args.html">classifai_ocr_text_post_args</a></li><li><a href="classifai_openai_api_request_get_options.html">classifai_openai_api_request_get_options</a></li><li><a href="classifai_openai_api_request_get_url.html">classifai_openai_api_request_get_url</a></li><li><a href="classifai_openai_api_request_post_form_options.html">classifai_openai_api_request_post_form_options</a></li><li><a href="classifai_openai_api_request_post_form_url.html">classifai_openai_api_request_post_form_url</a></li><li><a href="classifai_openai_api_request_post_options.html">classifai_openai_api_request_post_options</a></li><li><a href="classifai_openai_api_request_post_url.html">classifai_openai_api_request_post_url</a></li><li><a href="classifai_openai_api_response_get.html">classifai_openai_api_response_get</a></li><li><a href="classifai_openai_api_response_post.html">classifai_openai_api_response_post</a></li><li><a href="classifai_openai_api_response_post_form.html">classifai_openai_api_response_post_form</a></li><li><a href="classifai_openai_characters_in_token.html">classifai_openai_characters_in_token</a></li><li><a href="classifai_openai_chatgpt_%257B$feature%257D.html">classifai_openai_chatgpt_{$feature}</a></li><li><a href="classifai_openai_dalle_allowed_image_roles.html">classifai_openai_dalle_allowed_image_roles</a></li><li><a href="classifai_openai_dalle_enable_image_gen.html">classifai_openai_dalle_enable_image_gen</a></li><li><a href="classifai_openai_embeddings_content.html">classifai_openai_embeddings_content</a></li><li><a href="classifai_openai_embeddings_post_statuses.html">classifai_openai_embeddings_post_statuses</a></li><li><a href="classifai_openai_embeddings_request_body.html">classifai_openai_embeddings_request_body</a></li><li><a href="classifai_openai_embeddings_should_classify.html">classifai_openai_embeddings_should_classify</a></li><li><a href="classifai_openai_embeddings_taxonomies.html">classifai_openai_embeddings_taxonomies</a></li><li><a href="classifai_openai_settings_post_statuses.html">classifai_openai_settings_post_statuses</a></li><li><a href="classifai_openai_settings_post_types.html">classifai_openai_settings_post_types</a></li><li><a href="classifai_openai_settings_taxonomies.html">classifai_openai_settings_taxonomies</a></li><li><a href="classifai_openai_tokens_per_word.html">classifai_openai_tokens_per_word</a></li><li><a href="classifai_post_statuses.html">classifai_post_statuses</a></li><li><a href="classifai_post_statuses_for_post_type_or_id.html">classifai_post_statuses_for_post_type_or_id</a></li><li><a href="classifai_post_types.html">classifai_post_types</a></li><li><a href="classifai_pre_render_post_audio_controls.html">classifai_pre_render_post_audio_controls</a></li><li><a href="classifai_recommended_block_attributes.html">classifai_recommended_block_attributes</a></li><li><a href="classifai_recommended_block_markup.html">classifai_recommended_block_markup</a></li><li><a href="classifai_recommended_content_post_args.html">classifai_recommended_content_post_args</a></li><li><a href="classifai_rest_bases.html">classifai_rest_bases</a></li><li><a href="classifai_services.html">classifai_services</a></li><li><a href="classifai_should_classify_post.html">classifai_should_classify_post</a></li><li><a href="classifai_should_crop_size.html">classifai_should_crop_size</a></li><li><a href="classifai_should_ocr_scan_image.html">classifai_should_ocr_scan_image</a></li><li><a href="classifai_should_register_save_post_handler.html">classifai_should_register_save_post_handler</a></li><li><a href="classifai_should_smart_crop_image.html">classifai_should_smart_crop_image</a></li><li><a href="classifai_smart_crop_max_pixel_dimension.html">classifai_smart_crop_max_pixel_dimension</a></li><li><a href="classifai_smart_crop_wp_filesystem.html">classifai_smart_crop_wp_filesystem</a></li><li><a href="classifai_smart_cropping_source_url.html">classifai_smart_cropping_source_url</a></li><li><a href="classifai_smart_cropping_thumb_file_name.html">classifai_smart_cropping_thumb_file_name</a></li><li><a href="classifai_taxonomy_for_feature.html">classifai_taxonomy_for_feature</a></li><li><a href="classifai_whisper_transcribe_request_body.html">classifai_whisper_transcribe_request_body</a></li><li><a href="classifai_whisper_transcribe_result.html">classifai_whisper_transcribe_result</a></li><li><a href="%257B$this-_menu_slug%257D_providers.html">{$this->menu_slug}_providers</a></li></ul><h3>Tutorials</h3><ul><li><a href="tutorial-useful-snippets.html">Useful snippets</a></li><li><a href="tutorial-wp-cli.html">WP-CLI Commands</a></li></ul> | ||
</nav> | ||
|
||
<br class="clear"> | ||
|
||
<script> prettyPrint(); </script> | ||
<script src="scripts/linenumber.js"> </script> | ||
</body> | ||
</html> |