Skip to content

Commit

Permalink
Add Knn and Replace plugins to Core (#198)
Browse files Browse the repository at this point in the history
* Add Knn and Replace plugins to Core
  • Loading branch information
djklim87 authored Jan 24, 2024
1 parent 5910a9f commit 8cfed72
Show file tree
Hide file tree
Showing 6 changed files with 560 additions and 1 deletion.
2 changes: 1 addition & 1 deletion APP_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.1
2.1.3
183 changes: 183 additions & 0 deletions src/Plugin/Knn/Handler.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
<?php declare(strict_types=1);

/*
Copyright (c) 2023, Manticore Software LTD (https://manticoresearch.com)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 or any later
version. You should have received a copy of the GPL license along with this
program; if you did not, you can find it at http://www.gnu.org/
*/

namespace Manticoresearch\Buddy\Base\Plugin\Knn;

use Manticoresearch\Buddy\Core\Error\ManticoreSearchClientError;
use Manticoresearch\Buddy\Core\Error\ManticoreSearchResponseError;
use Manticoresearch\Buddy\Core\ManticoreSearch\Client;
use Manticoresearch\Buddy\Core\ManticoreSearch\Endpoint;
use Manticoresearch\Buddy\Core\Plugin\BaseHandlerWithClient;
use Manticoresearch\Buddy\Core\Task\Task;
use Manticoresearch\Buddy\Core\Task\TaskResult;
use RuntimeException;

final class Handler extends BaseHandlerWithClient
{
/**
* Initialize the executor
*
* @param Payload $payload
* @return void
*/
public function __construct(public Payload $payload) {
}

/**
* Process the request
*
* @return Task
* @throws RuntimeException
*/
public function run(): Task {
$taskFn = static function (Payload $payload, Client $manticoreClient): TaskResult {
$knnField = self::getKnnField($manticoreClient, $payload);
$queryVector = self::getQueryVectorValue($manticoreClient, $payload, $knnField);

if ($queryVector === false) {
return TaskResult::none();
}
return TaskResult::raw(self::getKnnResult($manticoreClient, $payload, $queryVector));
};

return Task::create(
$taskFn, [$this->payload, $this->manticoreClient]
)->run();
}

/**
* @throws ManticoreSearchClientError
* @throws ManticoreSearchResponseError
*/
private static function getKnnField(Client $manticoreClient, Payload $payload): string {
$descResult = $manticoreClient
->sendRequest('DESC '.$payload->table)
->getResult();

$knnField = false;

if (!is_array($descResult) || empty($descResult[0]['data'])) {
throw ManticoreSearchClientError::create('Manticore didn\'t answer');
}

foreach ($descResult[0]['data'] as $field) {
if ($field['Type'] !== 'float_vector') {
continue;
}

$knnField = $field['Field'];
}

if (!$knnField) {
throw ManticoreSearchResponseError::create('Table '.$payload->table.' didnt have any KNN fields');
}

return $knnField;
}

private static function getQueryVectorValue(Client $client, Payload $payload, string $knnField): string|false {
$document = $client
->sendRequest('SELECT * FROM '.$payload->table.' WHERE id = '.$payload->docId)
->getResult();

if (is_array($document) && !empty($document[0]['data'])) {
return $document[0]['data'][0][$knnField];
}

return false;
}


/**
* @param Client $manticoreClient
* @param Payload $payload
* @param string $queryVector
* @return array <string, string>
* @throws ManticoreSearchClientError
*/
private static function getKnnResult(Client $manticoreClient, Payload $payload, string $queryVector): array {
if ($payload->endpointBundle === Endpoint::Search) {
return self::knnHttpQuery($manticoreClient, $payload, $queryVector);
}

return self::knnSqlQuery($manticoreClient, $payload, $queryVector);
}

/**
* @param Client $manticoreClient
* @param Payload $payload
* @param string $queryVector
* @return array <string, string>
* @throws ManticoreSearchClientError
*/
private static function knnHttpQuery(Client $manticoreClient, Payload $payload, string $queryVector): array {
$query = [
'index' => $payload->table,
'knn' => [
'field' => $payload->field,
'k' => (int)$payload->k,
'query_vector' => array_map(
function ($val) {
return (float)$val;
}, explode(',', $queryVector)
),
],
];

if ($payload->select !== ['*']) {
$query['_source'] = $payload->select;
}

$result = $manticoreClient
->sendRequest((string)json_encode($query), Endpoint::Search->value)
->getResult();

if (is_array($result['hits']) && isset($result['hits']['hits'])) {
foreach ($result['hits']['hits'] as $k => $v) {
if ($v['_id'] !== $payload->docId) {
continue;
}

unset($result['hits']['hits'][$k]);
}
}

return $result;
}

/**
* @param Client $manticoreClient
* @param Payload $payload
* @param string $queryVector
* @return array <string, string>
* @throws ManticoreSearchClientError
*/
private static function knnSqlQuery(Client $manticoreClient, Payload $payload, string $queryVector): array {
$query = 'SELECT '.implode(',', $payload->select).' FROM '.$payload->table.' WHERE '.
'knn ('.$payload->field.", $payload->k, ($queryVector))";

$result = $manticoreClient
->sendRequest($query)
->getResult();

if (is_array($result[0])) {
foreach ($result[0]['data'] as $k => $v) {
if ($v['id'] !== (int)$payload->docId) {
continue;
}

unset($result[0]['data'][$k]);
}
}

return $result;
}
}
107 changes: 107 additions & 0 deletions src/Plugin/Knn/Payload.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?php declare(strict_types=1);

/*
Copyright (c) 2023, Manticore Software LTD (https://manticoresearch.com)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 or any later
version. You should have received a copy of the GPL license along with this
program; if you did not, you can find it at http://www.gnu.org/
*/

namespace Manticoresearch\Buddy\Base\Plugin\Knn;

use Manticoresearch\Buddy\Core\ManticoreSearch\Endpoint;
use Manticoresearch\Buddy\Core\Network\Request;
use Manticoresearch\Buddy\Core\Plugin\BasePayload;

/**
* This is simple do nothing request that handle empty queries
* which can be as a result of only comments in it that we strip
*/
final class Payload extends BasePayload
{
public ?string $field = null;

public ?string $k = null;
public ?string $docId = null;

/** @var array<string> */
public array $select = [];

public ?string $table = null;

public Endpoint $endpointBundle;

/**
* @param Request $request
* @return static
*/
public static function fromRequest(Request $request): static {
$self = new static();

$self->endpointBundle = $request->endpointBundle;
// If we need process this query as http request
if ($self->endpointBundle === Endpoint::Search) {
$self->select = ['id', 'knn_dist()'];

$payload = json_decode($request->payload, true);
if (is_array($payload)) {
if (isset($payload['_source'])) {
$self->select = $payload['_source'];
}
$self->table = $payload['index'];
$self->field = $payload['knn']['field'];
$self->k = (string)$payload['knn']['k'];
$self->docId = (string)$payload['knn']['doc_id'];
}
} else {
$matches = $self::getMatches($request);

$self->select = array_map(
function ($row) {
return trim($row);
}, explode(',', $matches[1] ?? '')
);
$self->table = $matches[2] ?? null;
$self->field = $matches[3] ?? null;
$self->k = $matches[4] ?? null;
$self->docId = $matches[5] ?? null;
}

return $self;
}

/**
* @param Request $request
* @return bool
*/
public static function hasMatch(Request $request): bool {
if ($request->endpointBundle === Endpoint::Search) {
$payload = json_decode($request->payload, true);
if (is_array($payload) && isset($payload['knn']['doc_id'])) {
return true;
}
}

if (stripos($request->payload, 'knn') !== false && self::getMatches($request)) {
return true;
}

return false;
}

/**
* @param Request $request
* @return array<string>|bool
*/
private static function getMatches(Request $request): array|bool {
$pattern = '/^select\s+(.*)from\s+`*([a-z0-9_-]+)`*\s+'.
'.*?knn\s+\(\s*(.*)?\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*\)/usi';
if (!preg_match($pattern, $request->payload, $matches)) {
return false;
}

return $matches;
}
}
Loading

0 comments on commit 8cfed72

Please sign in to comment.