Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Onboard neural sparse search #141

Merged
merged 6 commits into from
Apr 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
MODEL_ALGORITHM,
PRETRAINED_MODEL_FORMAT,
PretrainedSentenceTransformer,
PretrainedSparseEncodingModel,
WORKFLOW_STATE,
} from './interfaces';

Expand Down Expand Up @@ -61,11 +62,15 @@ export const CREATE_INGEST_PIPELINE_STEP_TYPE = 'create_ingest_pipeline';
export const CREATE_INDEX_STEP_TYPE = 'create_index';
export const REGISTER_LOCAL_PRETRAINED_MODEL_STEP_TYPE =
'register_local_pretrained_model';
export const REGISTER_LOCAL_SPARSE_ENCODING_MODEL_STEP_TYPE =
'register_local_sparse_encoding_model';

/**
* ML PLUGIN PRETRAINED MODELS
* (based off of https://opensearch.org/docs/latest/ml-commons-plugin/pretrained-models/#sentence-transformers)
* (based off of https://opensearch.org/docs/latest/ml-commons-plugin/pretrained-models)
*/

// ---- SENTENCE TRANSFORMERS ----
export const ROBERTA_SENTENCE_TRANSFORMER = {
name: 'huggingface/sentence-transformers/all-distilroberta-v1',
shortenedName: 'all-distilroberta-v1',
Expand Down Expand Up @@ -96,6 +101,34 @@ export const BERT_SENTENCE_TRANSFORMER = {
vectorDimensions: 768,
} as PretrainedSentenceTransformer;

// ---- SPARSE ENCODERS ----
export const NEURAL_SPARSE_TRANSFORMER = {
name: 'amazon/neural-sparse/opensearch-neural-sparse-encoding-v1',
shortenedName: 'opensearch-neural-sparse-encoding-v1',
description: 'A general neural sparse encoding model',
format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
algorithm: MODEL_ALGORITHM.SPARSE_ENCODING,
version: '1.0.1',
} as PretrainedSparseEncodingModel;

export const NEURAL_SPARSE_DOC_TRANSFORMER = {
name: 'amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1',
shortenedName: 'opensearch-neural-sparse-encoding-doc-v1',
description: 'A general neural sparse encoding model',
format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
algorithm: MODEL_ALGORITHM.SPARSE_ENCODING,
version: '1.0.1',
} as PretrainedSparseEncodingModel;

export const NEURAL_SPARSE_TOKENIZER_TRANSFORMER = {
name: 'amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1',
shortenedName: 'opensearch-neural-sparse-tokenizer-v1',
description: 'A neural sparse tokenizer model',
format: PRETRAINED_MODEL_FORMAT.TORCH_SCRIPT,
algorithm: MODEL_ALGORITHM.SPARSE_ENCODING,
version: '1.0.1',
} as PretrainedSparseEncodingModel;

/**
* MISCELLANEOUS
*/
Expand Down
26 changes: 25 additions & 1 deletion common/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import { Node, Edge } from 'reactflow';
import { IComponentData } from '../public/component_types';
import { COMPONENT_CLASS } from '../public/utils';

export type Index = {
name: string;
Expand All @@ -16,7 +17,11 @@ export type Index = {
*/

export type ReactFlowComponent = Node<IComponentData>;
export type ReactFlowEdge = Edge<{}> & {};
export type ReactFlowEdge = Edge<{}> & {
key: string;
sourceClasses: COMPONENT_CLASS[];
targetClasses: COMPONENT_CLASS[];
};

type ReactFlowViewport = {
x: number;
Expand Down Expand Up @@ -49,6 +54,22 @@ export type TextEmbeddingProcessor = IngestProcessor & {
};
};

export type SparseEncodingProcessor = IngestProcessor & {
sparse_encoding: {
model_id: string;
field_map: {};
};
};

export type IndexConfiguration = {
settings: {};
mappings: IndexMappings;
};

export type IndexMappings = {
properties: {};
};

export type TemplateNode = {
id: string;
type: string;
Expand Down Expand Up @@ -135,6 +156,7 @@ export type Workflow = WorkflowTemplate & {

export enum USE_CASE {
SEMANTIC_SEARCH = 'SEMANTIC_SEARCH',
NEURAL_SPARSE_SEARCH = 'NEURAL_SPARSE_SEARCH',
}

/**
Expand Down Expand Up @@ -196,6 +218,8 @@ export type PretrainedSentenceTransformer = PretrainedModel & {
vectorDimensions: number;
};

export type PretrainedSparseEncodingModel = PretrainedModel & {};

export type ModelConfig = {
modelType?: string;
embeddingDimension?: number;
Expand Down
1 change: 1 addition & 0 deletions public/component_types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
export * from './interfaces';
export * from './transformer';
export * from './indexer';
export * from './other';
6 changes: 3 additions & 3 deletions public/component_types/indexer/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ export class Indexer extends BaseComponent {
this.baseClasses = [this.type];
this.inputs = [
{
id: 'transformer',
label: 'Transformer',
baseClass: COMPONENT_CLASS.TRANSFORMER,
id: 'document',
label: 'Document',
baseClass: COMPONENT_CLASS.DOCUMENT,
acceptMultiple: false,
},
];
Expand Down
2 changes: 1 addition & 1 deletion public/component_types/indexer/knn_indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export class KnnIndexer extends Indexer {
constructor() {
super();
this.type = COMPONENT_CLASS.KNN_INDEXER;
this.label = 'K-NN Indexer';
this.label = 'K-NN Index';
this.description = 'A specialized indexer for K-NN indices';
this.baseClasses = [...this.baseClasses, this.type];
this.createFields = [
Expand Down
30 changes: 30 additions & 0 deletions public/component_types/other/document.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

import { COMPONENT_CATEGORY, COMPONENT_CLASS } from '../../utils';
import { BaseComponent } from '../base_component';

/**
* A basic Document placeholder UI component.
* Does not have any functionality.
*/
export class Document extends BaseComponent {
constructor() {
super();
this.type = COMPONENT_CLASS.DOCUMENT;
this.label = 'Document';
this.description = 'A document to be ingested';
this.categories = [COMPONENT_CATEGORY.INGEST];
this.allowsCreation = false;
this.baseClasses = [this.type];
this.inputs = [];
this.outputs = [
{
label: this.label,
baseClasses: this.baseClasses,
},
];
}
}
6 changes: 6 additions & 0 deletions public/component_types/other/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

export * from './document';
1 change: 1 addition & 0 deletions public/component_types/transformer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@

export * from './ml_transformer';
export * from './text_embedding_transformer';
export * from './sparse_encoder_transformer';
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

import { COMPONENT_CATEGORY, COMPONENT_CLASS } from '../../../common';
import { MLTransformer } from '.';

/**
* A specialized sparse encoder ML transformer UI component
*/
export class SparseEncoderTransformer extends MLTransformer {
constructor() {
super();
this.type = COMPONENT_CLASS.SPARSE_ENCODER_TRANSFORMER;
this.label = 'Sparse Encoder';
this.description =
'A specialized ML transformer to perform sparse encoding';
this.categories = [COMPONENT_CATEGORY.INGEST];
this.baseClasses = [...this.baseClasses, this.type];
this.inputs = [
{
id: 'document',
label: 'Document',
baseClass: COMPONENT_CLASS.DOCUMENT,
acceptMultiple: false,
},
];
this.createFields = [
{
label: 'Sparse Encoding Model',
id: 'model',
type: 'model',
helpText:
'A sparse encoding model to be used for generating sparse vectors.',
helpLink:
'https://opensearch.org/docs/latest/ml-commons-plugin/integrating-ml-models/#choosing-a-model',
},
{
label: 'Input Field',
id: 'inputField',
type: 'string',
helpText:
'The name of the document field from which to obtain text for generating sparse embeddings.',
helpLink:
'https://opensearch.org/docs/latest/ingest-pipelines/processors/sparse-encoding/#configuration-parameters',
},
{
label: 'Vector Field',
id: 'vectorField',
type: 'string',
helpText: `The name of the document's vector field in which to store the generated sparse embeddings.`,
helpLink:
'https://opensearch.org/docs/latest/ingest-pipelines/processors/sparse-encoding/#configuration-parameters',
},
];
this.outputs = [
{
label: 'Transformed Document',
baseClasses: [COMPONENT_CLASS.DOCUMENT],
},
];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { COMPONENT_CLASS } from '../../../common';
import { COMPONENT_CATEGORY, COMPONENT_CLASS } from '../../../common';
import { MLTransformer } from '.';

/**
Expand All @@ -13,10 +13,18 @@ export class TextEmbeddingTransformer extends MLTransformer {
constructor() {
super();
this.type = COMPONENT_CLASS.TEXT_EMBEDDING_TRANSFORMER;
this.label = 'Text Embedding Transformer';
this.label = 'Text Embedder';
this.description = 'A specialized ML transformer for embedding text';
this.categories = [COMPONENT_CATEGORY.INGEST];
this.baseClasses = [...this.baseClasses, this.type];
this.inputs = [];
this.inputs = [
{
id: 'document',
label: 'Document',
baseClass: COMPONENT_CLASS.DOCUMENT,
acceptMultiple: false,
},
];
this.createFields = [
{
label: 'Text Embedding Model',
Expand All @@ -31,24 +39,23 @@ export class TextEmbeddingTransformer extends MLTransformer {
id: 'inputField',
type: 'string',
helpText:
'The name of the field from which to obtain text for generating text embeddings.',
'The name of the document field from which to obtain text for generating text embeddings.',
helpLink:
'https://opensearch.org/docs/latest/ingest-pipelines/processors/text-embedding/',
},
{
label: 'Vector Field',
id: 'vectorField',
type: 'string',
helpText:
' The name of the vector field in which to store the generated text embeddings.',
helpText: `The name of the document's vector field in which to store the generated text embeddings.`,
helpLink:
'https://opensearch.org/docs/latest/ingest-pipelines/processors/text-embedding/',
},
];
this.outputs = [
{
label: this.label,
baseClasses: this.baseClasses,
label: 'Transformed Document',
baseClasses: [COMPONENT_CLASS.DOCUMENT],
},
];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

import React, { useState } from 'react';
import { EuiSpacer, EuiText, EuiTitle } from '@elastic/eui';
import { EuiHorizontalRule, EuiSpacer, EuiText, EuiTitle } from '@elastic/eui';
import { InputFieldList } from './input_field_list';
import { NODE_CATEGORY, ReactFlowComponent } from '../../../../common';
import { NewOrExistingTabs } from '../workspace/workspace_components/new_or_existing_tabs';
Expand Down Expand Up @@ -58,11 +58,12 @@ export function ComponentInputs(props: ComponentInputsProps) {
<EuiText color="subdued">
{props.selectedComponent.data.description}
</EuiText>
<NewOrExistingTabs
{/* TODO: Add tabs back once it is finalized how much flexibility we want */}
{/* <NewOrExistingTabs
selectedTabId={selectedTabId}
setSelectedTabId={setSelectedTabId}
/>
<EuiSpacer size="s" />
/> */}
<EuiHorizontalRule size="full" />

<InputFieldList
componentId={props.selectedComponent.id}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import {
ModelFormValue,
MODEL_CATEGORY,
MPNET_SENTENCE_TRANSFORMER,
NEURAL_SPARSE_TRANSFORMER,
NEURAL_SPARSE_DOC_TRANSFORMER,
NEURAL_SPARSE_TOKENIZER_TRANSFORMER,
} from '../../../../../common';
import { AppState } from '../../../../store';

Expand Down Expand Up @@ -113,6 +116,24 @@ export function ModelField(props: ModelFieldProps) {
category: MODEL_CATEGORY.PRETRAINED,
algorithm: BERT_SENTENCE_TRANSFORMER.algorithm,
},
{
id: NEURAL_SPARSE_TRANSFORMER.name,
name: NEURAL_SPARSE_TRANSFORMER.shortenedName,
category: MODEL_CATEGORY.PRETRAINED,
algorithm: NEURAL_SPARSE_TRANSFORMER.algorithm,
},
{
id: NEURAL_SPARSE_DOC_TRANSFORMER.name,
name: NEURAL_SPARSE_DOC_TRANSFORMER.shortenedName,
category: MODEL_CATEGORY.PRETRAINED,
algorithm: NEURAL_SPARSE_DOC_TRANSFORMER.algorithm,
},
{
id: NEURAL_SPARSE_TOKENIZER_TRANSFORMER.name,
name: NEURAL_SPARSE_TOKENIZER_TRANSFORMER.shortenedName,
category: MODEL_CATEGORY.PRETRAINED,
algorithm: NEURAL_SPARSE_TOKENIZER_TRANSFORMER.algorithm,
},
];
setPretrainedModels(modelItems);
}, []);
Expand All @@ -121,6 +142,8 @@ export function ModelField(props: ModelFieldProps) {
// e.g., only show deployed models when 'deployed' button is selected
useEffect(() => {
if (selectedRadioId !== undefined) {
// TODO: add fine-grained filtering so only relevant pretrained and existing models
// are visible based on the use case
if (selectedRadioId === MODEL_CATEGORY.DEPLOYED) {
setSelectableModels(deployedModels);
} else {
Expand Down
Loading
Loading